apps/codecs/libffmpegFLAC/shndec.c

   1 /*
   2  * Shorten decoder
   3  * Copyright (c) 2005 Jeff Muizelaar
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19
  20 /**
  21  * @file shorten.c
  22  * Shorten decoder
  23  * @author Jeff Muizelaar
  24  *
  25  */
  26
  27 #include "bitstream.h"
  28 #include "golomb.h"
  29 #include "shndec.h"
  30
  31 #define ULONGSIZE 2
  32
  33 #define WAVE_FORMAT_PCM 0x0001
  34
  35 #define TYPESIZE     4
  36 #define CHANSIZE     0
  37 #define LPCQSIZE     2
  38 #define ENERGYSIZE   3
  39 #define BITSHIFTSIZE 2
  40
  41 #define TYPE_S16HL 3  /* signed 16 bit shorts: high-low */
  42 #define TYPE_S16LH 5  /* signed 16 bit shorts: low-high */
  43
  44 #define NWRAP 3
  45 #define NSKIPSIZE 1
  46
  47 #define LPCQUANT 5
  48 #define V2LPCQOFFSET (1 << LPCQUANT)
  49
  50 #define FNSIZE       2
  51
  52 #define VERBATIM_CKSIZE_SIZE  5
  53 #define VERBATIM_BYTE_SIZE    8
  54 #define CANONICAL_HEADER_SIZE 44
  55
  56 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
  57 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
  58 #define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
  59
  60 #define get_le16(gb) bswap_16(get_bits_long(gb, 16))
  61 #define get_le32(gb) bswap_32(get_bits_long(gb, 32))
  62
  63 static uint32_t bswap_32(uint32_t x){
  64     x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
  65     return (x>>16) | (x<<16);
  66 }
  67
  68 static uint16_t bswap_16(uint16_t x){
  69     return (x>>8) | (x<<8);
  70 }
  71
  72 /* converts fourcc string to int */
  73 static int ff_get_fourcc(const char *s){
  74     //assert( strlen(s)==4 );
  75     return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
  76 }
  77
  78 static unsigned int get_uint(ShortenContext *s, int k)
  79 {
  80     if (s->version != 0)
  81         k = get_ur_golomb_shorten(&s->gb, ULONGSIZE);
  82     return get_ur_golomb_shorten(&s->gb, k);
  83 }
  84
  85 #if defined(CPU_COLDFIRE)
  86 static void coldfire_lshift_samples(int n, int shift, int32_t *samples) ICODE_ATTR_FLAC;
  87 static void coldfire_lshift_samples(int n, int shift, int32_t *samples)
  88 {
  89 /*
  90     for (i = 0; i < n; i++)
  91         samples[i] =<< shift;
  92 */
  93     asm volatile (
  94             "move.l %[n], %%d0              \n" /* d0 = loop counter */
  95             "asr.l  #2, %%d0                \n"
  96             "beq l1_shift                   \n"
  97         "l2_shift:" /* main loop (unroll by 4) */
  98             "movem.l (%[x]), %%d4-%%d7      \n"
  99             "asl.l   %[s], %%d4             \n"
 100             "asl.l   %[s], %%d5             \n"
 101             "asl.l   %[s], %%d6             \n"
 102             "asl.l   %[s], %%d7             \n"
 103             "movem.l %%d4-%%d7, (%[x])      \n"
 104             "add.l  #16, %[x]               \n"
 105
 106             "subq.l  #1, %%d0               \n"
 107             "bne l2_shift                   \n"
 108         "l1_shift:" /* any loops left? */
 109             "and.l  #3, %[n]                \n"
 110             "beq l4_shift                   \n"
 111         "l3_shift:" /* remaining loops */
 112             "move.l (%[x]), %%d4            \n"
 113             "asl.l  %[s], %%d4              \n"
 114             "move.l %%d4, (%[x])+           \n"
 115
 116             "subq.l #1, %[n]                \n"
 117             "bne l3_shift                   \n"
 118         "l4_shift:" /* exit */
 119         : [n] "+d" (n),         /* d1 */
 120           [s] "+d" (shift),     /* d2 */
 121           [x] "+a" (samples)    /* a0 */
 122         :
 123         : "%d0", "%d4", "%d5", "%d6", "%d7"
 124     );
 125 }
 126 #endif
 127
 128 static inline void fix_bitshift(ShortenContext *s, int32_t *samples)
 129 {
 130     int i;
 131
 132     /* Wrapped samples don't get bitshifted, so we'll do them during
 133        the next iteration. */
 134     if (s->bitshift != 0) {
 135 #if defined(CPU_COLDFIRE)
 136         coldfire_lshift_samples(s->blocksize, s->bitshift, samples - s->nwrap);
 137 #else
 138         for (i = -s->nwrap; i < (s->blocksize - s->nwrap); i++)
 139             samples[i] <<= s->bitshift;
 140 #endif
 141     }
 142
 143     /* Also, when we have to remember to fix the wrapped samples when
 144        the bitshift changes.*/
 145     if (s->bitshift != s->last_bitshift) {
 146         if (s->last_bitshift != 0)
 147             for (i = -s->nwrap; i < 0; i++)
 148                 samples[i] <<= s->last_bitshift;
 149
 150         s->last_bitshift = s->bitshift;
 151     }
 152 }
 153
 154 static inline void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
 155                                        int residual_size, int pred_order)
 156 {
 157     int sum, i, j;
 158     int coeffs[MAX_PRED_ORDER];
 159
 160     for (i=0; i<pred_order; i++) {
 161         coeffs[i] = get_sr_golomb_shorten(&s->gb, LPCQUANT);
 162     }
 163
 164     for (i=0; i < s->blocksize; i++) {
 165         sum = s->lpcqoffset;
 166         for (j=0; j<pred_order; j++)
 167             sum += coeffs[j] * decoded[i-j-1];
 168
 169         decoded[i] =
 170             get_sr_golomb_shorten(&s->gb, residual_size) + (sum >> LPCQUANT);
 171     }
 172 }
 173
 174 static inline int shorten_decode_frame(ShortenContext *s, int32_t *decoded,
 175                                        int32_t *offset)
 176 {
 177     int i;
 178     int32_t sum;
 179
 180     int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
 181     switch (cmd) {
 182         case FN_ZERO:
 183         case FN_DIFF0:
 184         case FN_DIFF1:
 185         case FN_DIFF2:
 186         case FN_DIFF3:
 187         case FN_QLPC:
 188         {
 189             int residual_size = 0;
 190             int32_t coffset;
 191             if (cmd != FN_ZERO) {
 192                 residual_size = get_ur_golomb_shorten(&s->gb, ENERGYSIZE);
 193                 /* this is a hack as version 0 differed in defintion of
 194                    get_sr_golomb_shorten */
 195                 if (s->version == 0)
 196                     residual_size--;
 197               }
 198
 199             if (s->nmean == 0) {
 200                 coffset = offset[0];
 201             } else {
 202                 sum = (s->version < 2) ? 0 : s->nmean / 2;
 203                 for (i=0; i<s->nmean; i++)
 204                     sum += offset[i];
 205
 206                 coffset = sum / s->nmean;
 207                 if (s->version >= 2)
 208                     coffset >>= FFMIN(1, s->bitshift);
 209             }
 210
 211             switch (cmd) {
 212                 case FN_ZERO:
 213                     for (i=0; i<s->blocksize; i++)
 214                         decoded[i] = 0;
 215                     break;
 216
 217                 case FN_DIFF0:
 218                     for (i=0; i<s->blocksize; i++)
 219                         decoded[i] =
 220                             get_sr_golomb_shorten(&s->gb, residual_size) +
 221                             coffset;
 222                     break;
 223
 224                 case FN_DIFF1:
 225                     for (i=0; i<s->blocksize; i++)
 226                         decoded[i] =
 227                             get_sr_golomb_shorten(&s->gb, residual_size) +
 228                             decoded[i - 1];
 229                     break;
 230
 231                 case FN_DIFF2:
 232                     for (i=0; i<s->blocksize; i++)
 233                         decoded[i] =
 234                             get_sr_golomb_shorten(&s->gb, residual_size) +
 235                             2*decoded[i-1] - decoded[i-2];
 236                     break;
 237
 238                 case FN_DIFF3:
 239                     for (i=0; i<s->blocksize; i++)
 240                         decoded[i] =
 241                             get_sr_golomb_shorten(&s->gb, residual_size) +
 242                             3*decoded[i-1] - 3*decoded[i-2] + decoded[i-3];
 243                     break;
 244
 245                 case FN_QLPC:
 246                 {
 247                     int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
 248                     for (i=0; i<pred_order; i++)
 249                         decoded[i - pred_order] -= coffset;
 250                     decode_subframe_lpc(s, decoded, residual_size, pred_order);
 251                     if (coffset != 0) {
 252                         for (i=0; i < s->blocksize; i++)
 253                             decoded[i] += coffset;
 254                     }
 255                 }
 256             }
 257
 258             if (s->nmean > 0) {
 259                 sum = (s->version < 2) ? 0 : s->blocksize / 2;
 260                 for (i=0; i<s->blocksize; i++)
 261                     sum += decoded[i];
 262
 263                 for (i=1; i<s->nmean; i++)
 264                     offset[i-1] = offset[i];
 265
 266                 if (s->version < 2) {
 267                     offset[s->nmean - 1] = sum / s->blocksize;
 268                 } else {
 269                     offset[s->nmean - 1] =
 270                         (sum / s->blocksize) << s->bitshift;
 271                 }
 272             }
 273
 274             fix_bitshift(s, decoded);
 275             break;
 276         }
 277
 278         case FN_VERBATIM:
 279             i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
 280             while (i--)
 281                 get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
 282             break;
 283
 284         case FN_BITSHIFT:
 285             s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
 286             break;
 287
 288         case FN_BLOCKSIZE:
 289             s->blocksize = get_uint(s, av_log2(s->blocksize));
 290             break;
 291
 292         case FN_QUIT:
 293             break;
 294
 295         default:
 296             return FN_ERROR;
 297             break;
 298     }
 299
 300     return cmd;
 301 }
 302
 303 int shorten_decode_frames(ShortenContext *s, int *nsamples,
 304                           int32_t *decoded0, int32_t *decoded1,
 305                           int32_t *offset0, int32_t *offset1,
 306                           uint8_t *buf, int buf_size,
 307                           void (*yield)(void))
 308 {
 309     int32_t *decoded, *offset;
 310     int cmd;
 311
 312     *nsamples = 0;
 313
 314     init_get_bits(&s->gb, buf, buf_size*8);
 315     get_bits(&s->gb, s->bitindex);
 316
 317     int n = 0;
 318     while (n < NUM_DEC_LOOPS) {
 319         int chan = n%2;
 320         if (chan == 0) {
 321             decoded = decoded0 + s->nwrap + *nsamples;
 322             offset = offset0;
 323         } else {
 324             decoded = decoded1 + s->nwrap + *nsamples;
 325             offset = offset1;
 326         }
 327
 328         yield();
 329
 330         cmd = shorten_decode_frame(s, decoded, offset);
 331
 332         if (cmd == FN_VERBATIM || cmd == FN_BITSHIFT || cmd == FN_BLOCKSIZE) {
 333             continue;
 334         } else if (cmd == FN_QUIT || cmd == FN_ERROR) {
 335             break;
 336         }
 337
 338         *nsamples += chan * s->blocksize;
 339         n++;
 340     }
 341
 342     if (*nsamples) {
 343         /* Wrap the samples for the next loop */
 344         int i;
 345         for (i = 0; i < s->nwrap; i++) {
 346             decoded0[i] = decoded0[*nsamples + i];
 347             decoded1[i] = decoded1[*nsamples + i];
 348         }
 349
 350         /* Scale the samples for the pcmbuf */
 351         int scale = SHN_OUTPUT_DEPTH - s->bits_per_sample;
 352 #if defined(CPU_COLDFIRE)
 353         coldfire_lshift_samples(*nsamples, scale, decoded0 + s->nwrap);
 354         coldfire_lshift_samples(*nsamples, scale, decoded1 + s->nwrap);
 355 #else
 356         for (i = 0; i < *nsamples; i++) {
 357             decoded0[i + s->nwrap] <<= scale;
 358             decoded1[i + s->nwrap] <<= scale;
 359         }
 360 #endif
 361     }
 362
 363     return cmd;
 364 }
 365
 366 static int decode_wave_header(ShortenContext *s,
 367                               uint8_t *header,
 368                               int header_size)
 369 {
 370     GetBitContext hb;
 371     int len;
 372
 373     init_get_bits(&hb, header, header_size*8);
 374     if (get_le32(&hb) != MKTAG('R','I','F','F')) {
 375         return -8;
 376     }
 377
 378     int chunk_size = get_le32(&hb);
 379
 380     if (get_le32(&hb) != MKTAG('W','A','V','E')) {
 381         return -9;
 382     }
 383
 384     while (get_le32(&hb) != MKTAG('f','m','t',' ')) {
 385         len = get_le32(&hb);
 386         skip_bits(&hb, 8*len);
 387     }
 388
 389     len = get_le32(&hb);
 390     if (len < 16) {
 391         return -10;
 392     }
 393
 394     if (get_le16(&hb) != WAVE_FORMAT_PCM ) {
 395         return -11;
 396     }
 397
 398     s->channels = get_le16(&hb);
 399     if (s->channels > MAX_CHANNELS) {
 400         return -3;
 401     }
 402
 403     s->sample_rate = get_le32(&hb);
 404
 405     skip_bits(&hb, 32);
 406     //s->bit_rate = 8*get_le32(&hb);
 407
 408     int block_align = get_le16(&hb);
 409     s->totalsamples = (chunk_size - 36) / block_align;
 410
 411     s->bits_per_sample = get_le16(&hb);
 412     if (s->bits_per_sample != 16) {
 413         return -12;
 414     }
 415
 416     len -= 16;
 417     if (len > 0) {
 418         return len;
 419     }
 420
 421     return 0;
 422 }
 423
 424 int shorten_init(ShortenContext* s, uint8_t *buf, int buf_size)
 425 {
 426     int i;
 427
 428     s->blocksize = DEFAULT_BLOCK_SIZE;
 429     s->channels = 1;
 430     s->nmean = -1;
 431
 432     init_get_bits(&s->gb, buf, buf_size*8);
 433     get_bits(&s->gb, s->bitindex);
 434
 435     /* shorten signature */
 436     if (get_bits_long(&s->gb, 32) != bswap_32(ff_get_fourcc("ajkg"))) {
 437         return -1;
 438     }
 439
 440     s->version = get_bits(&s->gb, 8);
 441
 442     int internal_ftype = get_uint(s, TYPESIZE);
 443     if ((internal_ftype != TYPE_S16HL) && (internal_ftype != TYPE_S16LH)) {
 444         return -2;
 445     }
 446
 447     s->channels = get_uint(s, CHANSIZE);
 448     if (s->channels > MAX_CHANNELS) {
 449         return -3;
 450     }
 451
 452     /* get blocksize if version > 0 */
 453     int maxnlpc = 0;
 454     if (s->version > 0) {
 455         s->blocksize = get_uint(s, av_log2(DEFAULT_BLOCK_SIZE));
 456         maxnlpc = get_uint(s, LPCQSIZE);
 457         s->nmean = get_uint(s, 0);
 458
 459         int skip_bytes = get_uint(s, NSKIPSIZE);
 460         for (i=0; i<skip_bytes; i++) {
 461             skip_bits(&s->gb, 8);
 462         }
 463     }
 464
 465     if (s->nmean > MAX_NMEAN) {
 466         return -4;
 467     }
 468
 469     s->nwrap = FFMAX(NWRAP, maxnlpc);
 470     if (s->nwrap > MAX_NWRAP) {
 471         return -5;
 472     }
 473
 474     if (s->version > 1)
 475         s->lpcqoffset = V2LPCQOFFSET;
 476
 477     if (get_ur_golomb_shorten(&s->gb, FNSIZE) != FN_VERBATIM) {
 478         return -6;
 479     }
 480
 481     uint8_t header[MAX_HEADER_SIZE];
 482     int header_size = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
 483     if (header_size >= MAX_HEADER_SIZE || header_size < CANONICAL_HEADER_SIZE) {
 484         return -7;
 485     }
 486
 487     for (i=0; i<header_size; i++)
 488         header[i] = (char)get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
 489
 490     s->header_bits = s->gb.index;
 491
 492     return decode_wave_header(s, header, header_size);
 493 }