libavcodec/intrax8.c

   1 /*
   2  * This file is part of Libav.
   3  *
   4  * Libav is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * Libav is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with Libav; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 /**
  20  * @file
  21  * @brief IntraX8 (J-Frame) subdecoder, used by WMV2 and VC-1
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "idctdsp.h"
  27 #include "msmpeg4data.h"
  28 #include "intrax8huf.h"
  29 #include "intrax8.h"
  30 #include "intrax8dsp.h"
  31 #include "mpegutils.h"
  32
  33 #define MAX_TABLE_DEPTH(table_bits, max_bits) \
  34     ((max_bits + table_bits - 1) / table_bits)
  35
  36 #define DC_VLC_BITS 9
  37 #define AC_VLC_BITS 9
  38 #define OR_VLC_BITS 7
  39
  40 #define DC_VLC_MTD MAX_TABLE_DEPTH(DC_VLC_BITS, MAX_DC_VLC_BITS)
  41 #define AC_VLC_MTD MAX_TABLE_DEPTH(AC_VLC_BITS, MAX_AC_VLC_BITS)
  42 #define OR_VLC_MTD MAX_TABLE_DEPTH(OR_VLC_BITS, MAX_OR_VLC_BITS)
  43
  44 static VLC j_ac_vlc[2][2][8];  // [quant < 13], [intra / inter], [select]
  45 static VLC j_dc_vlc[2][8];     // [quant], [select]
  46 static VLC j_orient_vlc[2][4]; // [quant], [select]
  47
  48 static av_cold int x8_vlc_init(void)
  49 {
  50     int i;
  51     int offset = 0;
  52     int sizeidx = 0;
  53     static const uint16_t sizes[8 * 4 + 8 * 2 + 2 + 4] = {
  54         576, 548, 582, 618, 546, 616, 560, 642,
  55         584, 582, 704, 664, 512, 544, 656, 640,
  56         512, 648, 582, 566, 532, 614, 596, 648,
  57         586, 552, 584, 590, 544, 578, 584, 624,
  58
  59         528, 528, 526, 528, 536, 528, 526, 544,
  60         544, 512, 512, 528, 528, 544, 512, 544,
  61
  62         128, 128, 128, 128, 128, 128,
  63     };
  64
  65     static VLC_TYPE table[28150][2];
  66
  67 // set ac tables
  68 #define init_ac_vlc(dst, src)                                                 \
  69     do {                                                                      \
  70         dst.table           = &table[offset];                                 \
  71         dst.table_allocated = sizes[sizeidx];                                 \
  72         offset             += sizes[sizeidx++];                               \
  73         init_vlc(&dst, AC_VLC_BITS, 77, &src[1], 4, 2, &src[0], 4, 2,         \
  74                  INIT_VLC_USE_NEW_STATIC);                                    \
  75     } while(0)
  76
  77     for (i = 0; i < 8; i++) {
  78         init_ac_vlc(j_ac_vlc[0][0][i], x8_ac0_highquant_table[i][0]);
  79         init_ac_vlc(j_ac_vlc[0][1][i], x8_ac1_highquant_table[i][0]);
  80         init_ac_vlc(j_ac_vlc[1][0][i], x8_ac0_lowquant_table[i][0]);
  81         init_ac_vlc(j_ac_vlc[1][1][i], x8_ac1_lowquant_table[i][0]);
  82     }
  83 #undef init_ac_vlc
  84
  85 // set dc tables
  86 #define init_dc_vlc(dst, src)                                                 \
  87     do {                                                                      \
  88         dst.table           = &table[offset];                                 \
  89         dst.table_allocated = sizes[sizeidx];                                 \
  90         offset             += sizes[sizeidx++];                               \
  91         init_vlc(&dst, DC_VLC_BITS, 34, &src[1], 4, 2, &src[0], 4, 2,         \
  92                  INIT_VLC_USE_NEW_STATIC);                                    \
  93     } while(0)
  94
  95     for (i = 0; i < 8; i++) {
  96         init_dc_vlc(j_dc_vlc[0][i], x8_dc_highquant_table[i][0]);
  97         init_dc_vlc(j_dc_vlc[1][i], x8_dc_lowquant_table[i][0]);
  98     }
  99 #undef init_dc_vlc
 100
 101 // set orient tables
 102 #define init_or_vlc(dst, src)                                                 \
 103     do {                                                                      \
 104         dst.table           = &table[offset];                                 \
 105         dst.table_allocated = sizes[sizeidx];                                 \
 106         offset             += sizes[sizeidx++];                               \
 107         init_vlc(&dst, OR_VLC_BITS, 12, &src[1], 4, 2, &src[0], 4, 2,         \
 108                  INIT_VLC_USE_NEW_STATIC);                                    \
 109     } while(0)
 110
 111     for (i = 0; i < 2; i++)
 112         init_or_vlc(j_orient_vlc[0][i], x8_orient_highquant_table[i][0]);
 113     for (i = 0; i < 4; i++)
 114         init_or_vlc(j_orient_vlc[1][i], x8_orient_lowquant_table[i][0]);
 115 #undef init_or_vlc
 116
 117     if (offset != sizeof(table) / sizeof(VLC_TYPE) / 2) {
 118         av_log(NULL, AV_LOG_ERROR, "table size %zd does not match needed %i\n",
 119                sizeof(table) / sizeof(VLC_TYPE) / 2, offset);
 120         return AVERROR_INVALIDDATA;
 121     }
 122
 123     return 0;
 124 }
 125
 126 static void x8_reset_vlc_tables(IntraX8Context *w)
 127 {
 128     memset(w->j_dc_vlc, 0, sizeof(w->j_dc_vlc));
 129     memset(w->j_ac_vlc, 0, sizeof(w->j_ac_vlc));
 130     w->j_orient_vlc = NULL;
 131 }
 132
 133 static inline void x8_select_ac_table(IntraX8Context *const w, int mode)
 134 {
 135     int table_index;
 136
 137     assert(mode < 4);
 138
 139     if (w->j_ac_vlc[mode])
 140         return;
 141
 142     table_index       = get_bits(w->gb, 3);
 143     // 2 modes use same tables
 144     w->j_ac_vlc[mode] = &j_ac_vlc[w->quant < 13][mode >> 1][table_index];
 145
 146     assert(w->j_ac_vlc[mode]);
 147 }
 148
 149 static inline int x8_get_orient_vlc(IntraX8Context *w)
 150 {
 151     if (!w->j_orient_vlc) {
 152         int table_index = get_bits(w->gb, 1 + (w->quant < 13));
 153         w->j_orient_vlc = &j_orient_vlc[w->quant < 13][table_index];
 154     }
 155     assert(w->j_orient_vlc);
 156     assert(w->j_orient_vlc->table);
 157
 158     return get_vlc2(w->gb, w->j_orient_vlc->table, OR_VLC_BITS, OR_VLC_MTD);
 159 }
 160
 161 #define extra_bits(eb)  (eb)        // 3 bits
 162 #define extra_run       (0xFF << 8) // 1 bit
 163 #define extra_level     (0x00 << 8) // 1 bit
 164 #define run_offset(r)   ((r) << 16) // 6 bits
 165 #define level_offset(l) ((l) << 24) // 5 bits
 166 static const uint32_t ac_decode_table[] = {
 167     /* 46 */ extra_bits(3) | extra_run   | run_offset(16) | level_offset(0),
 168     /* 47 */ extra_bits(3) | extra_run   | run_offset(24) | level_offset(0),
 169     /* 48 */ extra_bits(2) | extra_run   | run_offset(4)  | level_offset(1),
 170     /* 49 */ extra_bits(3) | extra_run   | run_offset(8)  | level_offset(1),
 171
 172     /* 50 */ extra_bits(5) | extra_run   | run_offset(32) | level_offset(0),
 173     /* 51 */ extra_bits(4) | extra_run   | run_offset(16) | level_offset(1),
 174
 175     /* 52 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(4),
 176     /* 53 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(8),
 177     /* 54 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(12),
 178     /* 55 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(16),
 179     /* 56 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(24),
 180
 181     /* 57 */ extra_bits(2) | extra_level | run_offset(1)  | level_offset(3),
 182     /* 58 */ extra_bits(3) | extra_level | run_offset(1)  | level_offset(7),
 183
 184     /* 59 */ extra_bits(2) | extra_run   | run_offset(16) | level_offset(0),
 185     /* 60 */ extra_bits(2) | extra_run   | run_offset(20) | level_offset(0),
 186     /* 61 */ extra_bits(2) | extra_run   | run_offset(24) | level_offset(0),
 187     /* 62 */ extra_bits(2) | extra_run   | run_offset(28) | level_offset(0),
 188     /* 63 */ extra_bits(4) | extra_run   | run_offset(32) | level_offset(0),
 189     /* 64 */ extra_bits(4) | extra_run   | run_offset(48) | level_offset(0),
 190
 191     /* 65 */ extra_bits(2) | extra_run   | run_offset(4)  | level_offset(1),
 192     /* 66 */ extra_bits(3) | extra_run   | run_offset(8)  | level_offset(1),
 193     /* 67 */ extra_bits(4) | extra_run   | run_offset(16) | level_offset(1),
 194
 195     /* 68 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(4),
 196     /* 69 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(8),
 197     /* 70 */ extra_bits(4) | extra_level | run_offset(0)  | level_offset(16),
 198
 199     /* 71 */ extra_bits(2) | extra_level | run_offset(1)  | level_offset(3),
 200     /* 72 */ extra_bits(3) | extra_level | run_offset(1)  | level_offset(7),
 201 };
 202 #undef extra_bits
 203 #undef extra_run
 204 #undef extra_level
 205 #undef run_offset
 206 #undef level_offset
 207
 208 static void x8_get_ac_rlf(IntraX8Context *const w, const int mode,
 209                           int *const run, int *const level, int *const final)
 210 {
 211     int i, e;
 212
 213 //    x8_select_ac_table(w, mode);
 214     i = get_vlc2(w->gb, w->j_ac_vlc[mode]->table, AC_VLC_BITS, AC_VLC_MTD);
 215
 216     if (i < 46) { // [0-45]
 217         int t, l;
 218         if (i < 0) {
 219             *level =
 220             *final =      // prevent 'may be used uninitialized'
 221             *run   = 64;  // this would cause error exit in the ac loop
 222             return;
 223         }
 224
 225         /*
 226          * i == 0-15  r = 0-15 l = 0; r = i & %01111
 227          * i == 16-19 r = 0-3  l = 1; r = i & %00011
 228          * i == 20-21 r = 0-1  l = 2; r = i & %00001
 229          * i == 22    r = 0    l = 3; r = i & %00000
 230          */
 231
 232         *final =
 233         t      = i > 22;
 234         i     -= 23 * t;
 235
 236         /* l = lut_l[i / 2] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3 }[i >> 1];
 237          *     11 10'01 01'00 00'00 00'00 00'00 00 => 0xE50000 */
 238         l = (0xE50000 >> (i & 0x1E)) & 3; // 0x1E or ~1 or (i >> 1 << 1)
 239
 240         /* t = lut_mask[l] = { 0x0f, 0x03, 0x01, 0x00 }[l];
 241          *     as i < 256 the higher bits do not matter */
 242         t = 0x01030F >> (l << 3);
 243
 244         *run   = i & t;
 245         *level = l;
 246     } else if (i < 73) { // [46-72]
 247         uint32_t sm;
 248         uint32_t mask;
 249
 250         i -= 46;
 251         sm = ac_decode_table[i];
 252
 253         e    = get_bits(w->gb, sm & 0xF);
 254         sm >>= 8;                               // 3 bits
 255         mask = sm & 0xff;
 256         sm >>= 8;                               // 1 bit
 257
 258         *run   = (sm &  0xff) + (e &  mask);    // 6 bits
 259         *level = (sm >>    8) + (e & ~mask);    // 5 bits
 260         *final = i > (58 - 46);
 261     } else if (i < 75) { // [73-74]
 262         static const uint8_t crazy_mix_runlevel[32] = {
 263             0x22, 0x32, 0x33, 0x53, 0x23, 0x42, 0x43, 0x63,
 264             0x24, 0x52, 0x34, 0x73, 0x25, 0x62, 0x44, 0x83,
 265             0x26, 0x72, 0x35, 0x54, 0x27, 0x82, 0x45, 0x64,
 266             0x28, 0x92, 0x36, 0x74, 0x29, 0xa2, 0x46, 0x84,
 267         };
 268
 269         *final = !(i & 1);
 270         e      = get_bits(w->gb, 5); // get the extra bits
 271         *run   = crazy_mix_runlevel[e] >> 4;
 272         *level = crazy_mix_runlevel[e] & 0x0F;
 273     } else {
 274         *level = get_bits(w->gb, 7 - 3 * (i & 1));
 275         *run   = get_bits(w->gb, 6);
 276         *final = get_bits1(w->gb);
 277     }
 278     return;
 279 }
 280
 281 /* static const uint8_t dc_extra_sbits[] = {
 282  *     0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
 283  * }; */
 284 static const uint8_t dc_index_offset[] = {
 285     0, 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
 286 };
 287
 288 static int x8_get_dc_rlf(IntraX8Context *const w, const int mode,
 289                          int *const level, int *const final)
 290 {
 291     int i, e, c;
 292
 293     assert(mode < 3);
 294     if (!w->j_dc_vlc[mode]) {
 295         int table_index = get_bits(w->gb, 3);
 296         // 4 modes, same table
 297         w->j_dc_vlc[mode] = &j_dc_vlc[w->quant < 13][table_index];
 298     }
 299     assert(w->j_dc_vlc);
 300     assert(w->j_dc_vlc[mode]->table);
 301
 302     i = get_vlc2(w->gb, w->j_dc_vlc[mode]->table, DC_VLC_BITS, DC_VLC_MTD);
 303
 304     /* (i >= 17) { i -= 17; final =1; } */
 305     c      = i > 16;
 306     *final = c;
 307     i      -= 17 * c;
 308
 309     if (i <= 0) {
 310         *level = 0;
 311         return -i;
 312     }
 313     c  = (i + 1) >> 1; // hackish way to calculate dc_extra_sbits[]
 314     c -= c > 1;
 315
 316     e = get_bits(w->gb, c); // get the extra bits
 317     i = dc_index_offset[i] + (e >> 1);
 318
 319     e      = -(e & 1);     // 0, 0xffffff
 320     *level =  (i ^ e) - e; // (i ^ 0) - 0, (i ^ 0xff) - (-1)
 321     return 0;
 322 }
 323
 324 // end of huffman
 325
 326 static int x8_setup_spatial_predictor(IntraX8Context *const w, const int chroma)
 327 {
 328     int range;
 329     int sum;
 330     int quant;
 331
 332     w->dsp.setup_spatial_compensation(w->dest[chroma], w->scratchpad,
 333                                       w->frame->linesize[chroma > 0],
 334                                       &range, &sum, w->edges);
 335     if (chroma) {
 336         w->orient = w->chroma_orient;
 337         quant     = w->quant_dc_chroma;
 338     } else {
 339         quant = w->quant;
 340     }
 341
 342     w->flat_dc = 0;
 343     if (range < quant || range < 3) {
 344         w->orient = 0;
 345
 346         // yep you read right, a +-1 idct error may break decoding!
 347         if (range < 3) {
 348             w->flat_dc      = 1;
 349             sum            += 9;
 350             // ((1 << 17) + 9) / (8 + 8 + 1 + 2) = 6899
 351             w->predicted_dc = sum * 6899 >> 17;
 352         }
 353     }
 354     if (chroma)
 355         return 0;
 356
 357     assert(w->orient < 3);
 358     if (range < 2 * w->quant) {
 359         if ((w->edges & 3) == 0) {
 360             if (w->orient == 1)
 361                 w->orient = 11;
 362             if (w->orient == 2)
 363                 w->orient = 10;
 364         } else {
 365             w->orient = 0;
 366         }
 367         w->raw_orient = 0;
 368     } else {
 369         static const uint8_t prediction_table[3][12] = {
 370             { 0, 8, 4, 10, 11, 2, 6, 9, 1, 3, 5, 7 },
 371             { 4, 0, 8, 11, 10, 3, 5, 2, 6, 9, 1, 7 },
 372             { 8, 0, 4, 10, 11, 1, 7, 2, 6, 9, 3, 5 },
 373         };
 374         w->raw_orient = x8_get_orient_vlc(w);
 375         if (w->raw_orient < 0)
 376             return -1;
 377         assert(w->raw_orient < 12);
 378         assert(w->orient < 3);
 379         w->orient = prediction_table[w->orient][w->raw_orient];
 380     }
 381     return 0;
 382 }
 383
 384 static void x8_update_predictions(IntraX8Context *const w, const int orient,
 385                                   const int est_run)
 386 {
 387     w->prediction_table[w->mb_x * 2 + (w->mb_y & 1)] = (est_run << 2) + 1 * (orient == 4) + 2 * (orient == 8);
 388 /*
 389  * y = 2n + 0 -> // 0 2 4
 390  * y = 2n + 1 -> // 1 3 5
 391  */
 392 }
 393
 394 static void x8_get_prediction_chroma(IntraX8Context *const w)
 395 {
 396     w->edges  = 1 * !(w->mb_x >> 1);
 397     w->edges |= 2 * !(w->mb_y >> 1);
 398     w->edges |= 4 * (w->mb_x >= (2 * w->mb_width - 1)); // mb_x for chroma would always be odd
 399
 400     w->raw_orient = 0;
 401     // lut_co[8] = {inv,4,8,8, inv,4,8,8} <- => {1,1,0,0;1,1,0,0} => 0xCC
 402     if (w->edges & 3) {
 403         w->chroma_orient = 4 << ((0xCC >> w->edges) & 1);
 404         return;
 405     }
 406     // block[x - 1][y | 1 - 1)]
 407     w->chroma_orient = (w->prediction_table[2 * w->mb_x - 2] & 0x03) << 2;
 408 }
 409
 410 static void x8_get_prediction(IntraX8Context *const w)
 411 {
 412     int a, b, c, i;
 413
 414     w->edges  = 1 * !w->mb_x;
 415     w->edges |= 2 * !w->mb_y;
 416     w->edges |= 4 * (w->mb_x >= (2 * w->mb_width - 1));
 417
 418     switch (w->edges & 3) {
 419     case 0:
 420         break;
 421     case 1:
 422         // take the one from the above block[0][y - 1]
 423         w->est_run = w->prediction_table[!(w->mb_y & 1)] >> 2;
 424         w->orient  = 1;
 425         return;
 426     case 2:
 427         // take the one from the previous block[x - 1][0]
 428         w->est_run = w->prediction_table[2 * w->mb_x - 2] >> 2;
 429         w->orient  = 2;
 430         return;
 431     case 3:
 432         w->est_run = 16;
 433         w->orient  = 0;
 434         return;
 435     }
 436     // no edge cases
 437     b = w->prediction_table[2 * w->mb_x     + !(w->mb_y & 1)]; // block[x    ][y - 1]
 438     a = w->prediction_table[2 * w->mb_x - 2 +  (w->mb_y & 1)]; // block[x - 1][y    ]
 439     c = w->prediction_table[2 * w->mb_x - 2 + !(w->mb_y & 1)]; // block[x - 1][y - 1]
 440
 441     w->est_run = FFMIN(b, a);
 442     /* This condition has nothing to do with w->edges, even if it looks
 443      * similar it would trigger if e.g. x = 3; y = 2;
 444      * I guess somebody wrote something wrong and it became standard. */
 445     if ((w->mb_x & w->mb_y) != 0)
 446         w->est_run = FFMIN(c, w->est_run);
 447     w->est_run >>= 2;
 448
 449     a &= 3;
 450     b &= 3;
 451     c &= 3;
 452
 453     i = (0xFFEAF4C4 >> (2 * b + 8 * a)) & 3;
 454     if (i != 3)
 455         w->orient = i;
 456     else
 457         w->orient = (0xFFEAD8 >> (2 * c + 8 * (w->quant > 12))) & 3;
 458 /*
 459  * lut1[b][a] = {
 460  * ->{ 0, 1, 0, pad },
 461  *   { 0, 1, X, pad },
 462  *   { 2, 2, 2, pad }
 463  * }
 464  * pad 2  2  2;
 465  * pad X  1  0;
 466  * pad 0  1  0 <-
 467  * -> 11 10 '10 10 '11 11'01 00 '11 00'01 00 => 0xEAF4C4
 468  *
 469  * lut2[q>12][c] = {
 470  * ->{ 0, 2, 1, pad},
 471  *   { 2, 2, 2, pad}
 472  * }
 473  * pad 2  2  2;
 474  * pad 1  2  0 <-
 475  * -> 11 10'10 10 '11 01'10 00 => 0xEAD8
 476  */
 477 }
 478
 479 static void x8_ac_compensation(IntraX8Context *const w, const int direction,
 480                                const int dc_level)
 481 {
 482     int t;
 483 #define B(x, y) w->block[0][w->idsp.idct_permutation[(x) + (y) * 8]]
 484 #define T(x)  ((x) * dc_level + 0x8000) >> 16;
 485     switch (direction) {
 486     case 0:
 487         t        = T(3811); // h
 488         B(1, 0) -= t;
 489         B(0, 1) -= t;
 490
 491         t        = T(487); // e
 492         B(2, 0) -= t;
 493         B(0, 2) -= t;
 494
 495         t        = T(506); // f
 496         B(3, 0) -= t;
 497         B(0, 3) -= t;
 498
 499         t        = T(135); // c
 500         B(4, 0) -= t;
 501         B(0, 4) -= t;
 502         B(2, 1) += t;
 503         B(1, 2) += t;
 504         B(3, 1) += t;
 505         B(1, 3) += t;
 506
 507         t        = T(173); // d
 508         B(5, 0) -= t;
 509         B(0, 5) -= t;
 510
 511         t        = T(61); // b
 512         B(6, 0) -= t;
 513         B(0, 6) -= t;
 514         B(5, 1) += t;
 515         B(1, 5) += t;
 516
 517         t        = T(42); // a
 518         B(7, 0) -= t;
 519         B(0, 7) -= t;
 520         B(4, 1) += t;
 521         B(1, 4) += t;
 522         B(4, 4) += t;
 523
 524         t        = T(1084); // g
 525         B(1, 1) += t;
 526
 527         w->block_last_index[0] = FFMAX(w->block_last_index[0], 7 * 8);
 528         break;
 529     case 1:
 530         B(0, 1) -= T(6269);
 531         B(0, 3) -= T(708);
 532         B(0, 5) -= T(172);
 533         B(0, 7) -= T(73);
 534
 535         w->block_last_index[0] = FFMAX(w->block_last_index[0], 7 * 8);
 536         break;
 537     case 2:
 538         B(1, 0) -= T(6269);
 539         B(3, 0) -= T(708);
 540         B(5, 0) -= T(172);
 541         B(7, 0) -= T(73);
 542
 543         w->block_last_index[0] = FFMAX(w->block_last_index[0], 7);
 544         break;
 545     }
 546 #undef B
 547 #undef T
 548 }
 549
 550 static void dsp_x8_put_solidcolor(const uint8_t pix, uint8_t *dst,
 551                                   const ptrdiff_t linesize)
 552 {
 553     int k;
 554     for (k = 0; k < 8; k++) {
 555         memset(dst, pix, 8);
 556         dst += linesize;
 557     }
 558 }
 559
 560 static const int16_t quant_table[64] = {
 561     256, 256, 256, 256, 256, 256, 259, 262,
 562     265, 269, 272, 275, 278, 282, 285, 288,
 563     292, 295, 299, 303, 306, 310, 314, 317,
 564     321, 325, 329, 333, 337, 341, 345, 349,
 565     353, 358, 362, 366, 371, 375, 379, 384,
 566     389, 393, 398, 403, 408, 413, 417, 422,
 567     428, 433, 438, 443, 448, 454, 459, 465,
 568     470, 476, 482, 488, 493, 499, 505, 511,
 569 };
 570
 571 static int x8_decode_intra_mb(IntraX8Context *const w, const int chroma)
 572 {
 573     uint8_t *scantable;
 574     int final, run, level;
 575     int ac_mode, dc_mode, est_run, dc_level;
 576     int pos, n;
 577     int zeros_only;
 578     int use_quant_matrix;
 579     int sign;
 580
 581     assert(w->orient < 12);
 582     w->bdsp.clear_block(w->block[0]);
 583
 584     if (chroma)
 585         dc_mode = 2;
 586     else
 587         dc_mode = !!w->est_run; // 0, 1
 588
 589     if (x8_get_dc_rlf(w, dc_mode, &dc_level, &final))
 590         return -1;
 591     n          = 0;
 592     zeros_only = 0;
 593     if (!final) { // decode ac
 594         use_quant_matrix = w->use_quant_matrix;
 595         if (chroma) {
 596             ac_mode = 1;
 597             est_run = 64; // not used
 598         } else {
 599             if (w->raw_orient < 3)
 600                 use_quant_matrix = 0;
 601
 602             if (w->raw_orient > 4) {
 603                 ac_mode = 0;
 604                 est_run = 64;
 605             } else {
 606                 if (w->est_run > 1) {
 607                     ac_mode = 2;
 608                     est_run = w->est_run;
 609                 } else {
 610                     ac_mode = 3;
 611                     est_run = 64;
 612                 }
 613             }
 614         }
 615         x8_select_ac_table(w, ac_mode);
 616         /* scantable_selector[12] = { 0, 2, 0, 1, 1, 1, 0, 2, 2, 0, 1, 2 }; <-
 617          * -> 10'01' 00'10' 10'00' 01'01' 01'00' 10'00 => 0x928548 */
 618         scantable = w->scantable[(0x928548 >> (2 * w->orient)) & 3].permutated;
 619         pos       = 0;
 620         do {
 621             n++;
 622             if (n >= est_run) {
 623                 ac_mode = 3;
 624                 x8_select_ac_table(w, 3);
 625             }
 626
 627             x8_get_ac_rlf(w, ac_mode, &run, &level, &final);
 628
 629             pos += run + 1;
 630             if (pos > 63) {
 631                 // this also handles vlc error in x8_get_ac_rlf
 632                 return -1;
 633             }
 634             level  = (level + 1) * w->dquant;
 635             level += w->qsum;
 636
 637             sign  = -get_bits1(w->gb);
 638             level = (level ^ sign) - sign;
 639
 640             if (use_quant_matrix)
 641                 level = (level * quant_table[pos]) >> 8;
 642
 643             w->block[0][scantable[pos]] = level;
 644         } while (!final);
 645
 646         w->block_last_index[0] = pos;
 647     } else { // DC only
 648         w->block_last_index[0] = 0;
 649         if (w->flat_dc && ((unsigned) (dc_level + 1)) < 3) { // [-1; 1]
 650             int32_t divide_quant = !chroma ? w->divide_quant_dc_luma
 651                                            : w->divide_quant_dc_chroma;
 652             int32_t dc_quant     = !chroma ? w->quant
 653                                            : w->quant_dc_chroma;
 654
 655             // original intent dc_level += predicted_dc/quant;
 656             // but it got lost somewhere in the rounding
 657             dc_level += (w->predicted_dc * divide_quant + (1 << 12)) >> 13;
 658
 659             dsp_x8_put_solidcolor(av_clip_uint8((dc_level * dc_quant + 4) >> 3),
 660                                   w->dest[chroma],
 661                                   w->frame->linesize[!!chroma]);
 662
 663             goto block_placed;
 664         }
 665         zeros_only = dc_level == 0;
 666     }
 667     if (!chroma)
 668         w->block[0][0] = dc_level * w->quant;
 669     else
 670         w->block[0][0] = dc_level * w->quant_dc_chroma;
 671
 672     // there is !zero_only check in the original, but dc_level check is enough
 673     if ((unsigned int) (dc_level + 1) >= 3 && (w->edges & 3) != 3) {
 674         int direction;
 675         /* ac_comp_direction[orient] = { 0, 3, 3, 1, 1, 0, 0, 0, 2, 2, 2, 1 }; <-
 676          * -> 01'10' 10'10' 00'00' 00'01' 01'11' 11'00 => 0x6A017C */
 677         direction = (0x6A017C >> (w->orient * 2)) & 3;
 678         if (direction != 3) {
 679             // modify block_last[]
 680             x8_ac_compensation(w, direction, w->block[0][0]);
 681         }
 682     }
 683
 684     if (w->flat_dc) {
 685         dsp_x8_put_solidcolor(w->predicted_dc, w->dest[chroma],
 686                               w->frame->linesize[!!chroma]);
 687     } else {
 688         w->dsp.spatial_compensation[w->orient](w->scratchpad,
 689                                                w->dest[chroma],
 690                                                w->frame->linesize[!!chroma]);
 691     }
 692     if (!zeros_only)
 693         w->idsp.idct_add(w->dest[chroma],
 694                          w->frame->linesize[!!chroma],
 695                          w->block[0]);
 696
 697 block_placed:
 698     if (!chroma)
 699         x8_update_predictions(w, w->orient, n);
 700
 701     if (w->loopfilter) {
 702         uint8_t *ptr = w->dest[chroma];
 703         ptrdiff_t linesize = w->frame->linesize[!!chroma];
 704
 705         if (!((w->edges & 2) || (zeros_only && (w->orient | 4) == 4)))
 706             w->dsp.h_loop_filter(ptr, linesize, w->quant);
 707
 708         if (!((w->edges & 1) || (zeros_only && (w->orient | 8) == 8)))
 709             w->dsp.v_loop_filter(ptr, linesize, w->quant);
 710     }
 711     return 0;
 712 }
 713
 714 // FIXME maybe merge with ff_*
 715 static void x8_init_block_index(IntraX8Context *w, AVFrame *frame)
 716 {
 717     // not parent codec linesize as this would be wrong for field pics
 718     // not that IntraX8 has interlacing support ;)
 719     const ptrdiff_t linesize   = frame->linesize[0];
 720     const ptrdiff_t uvlinesize = frame->linesize[1];
 721
 722     w->dest[0] = frame->data[0];
 723     w->dest[1] = frame->data[1];
 724     w->dest[2] = frame->data[2];
 725
 726     w->dest[0] +=  w->mb_y       * linesize   << 3;
 727     // chroma blocks are on add rows
 728     w->dest[1] += (w->mb_y & ~1) * uvlinesize << 2;
 729     w->dest[2] += (w->mb_y & ~1) * uvlinesize << 2;
 730 }
 731
 732 av_cold int ff_intrax8_common_init(AVCodecContext *avctx,
 733                                    IntraX8Context *w, IDCTDSPContext *idsp,
 734                                    int16_t (*block)[64],
 735                                    int block_last_index[12],
 736                                    int mb_width, int mb_height)
 737 {
 738     int ret = x8_vlc_init();
 739     if (ret < 0)
 740         return ret;
 741
 742     w->avctx = avctx;
 743     w->idsp = *idsp;
 744     w->mb_width  = mb_width;
 745     w->mb_height = mb_height;
 746     w->block = block;
 747     w->block_last_index = block_last_index;
 748
 749     // two rows, 2 blocks per cannon mb
 750     w->prediction_table = av_mallocz(w->mb_width * 2 * 2);
 751     if (!w->prediction_table)
 752         return AVERROR(ENOMEM);
 753
 754     ff_init_scantable(w->idsp.idct_permutation, &w->scantable[0],
 755                       ff_wmv1_scantable[0]);
 756     ff_init_scantable(w->idsp.idct_permutation, &w->scantable[1],
 757                       ff_wmv1_scantable[2]);
 758     ff_init_scantable(w->idsp.idct_permutation, &w->scantable[2],
 759                       ff_wmv1_scantable[3]);
 760
 761     ff_intrax8dsp_init(&w->dsp);
 762     ff_blockdsp_init(&w->bdsp);
 763
 764     return 0;
 765 }
 766
 767 av_cold void ff_intrax8_common_end(IntraX8Context *w)
 768 {
 769     av_freep(&w->prediction_table);
 770 }
 771
 772 int ff_intrax8_decode_picture(IntraX8Context *w, Picture *pict,
 773                               GetBitContext *gb, int *mb_x, int *mb_y,
 774                               int dquant, int quant_offset,
 775                               int loopfilter, int lowdelay)
 776 {
 777     int mb_xy;
 778
 779     w->gb     = gb;
 780     w->dquant = dquant;
 781     w->quant  = dquant >> 1;
 782     w->qsum   = quant_offset;
 783     w->frame  = pict->f;
 784     w->loopfilter = loopfilter;
 785     w->use_quant_matrix = get_bits1(w->gb);
 786
 787     w->mb_x = *mb_x;
 788     w->mb_y = *mb_y;
 789
 790     w->divide_quant_dc_luma = ((1 << 16) + (w->quant >> 1)) / w->quant;
 791     if (w->quant < 5) {
 792         w->quant_dc_chroma        = w->quant;
 793         w->divide_quant_dc_chroma = w->divide_quant_dc_luma;
 794     } else {
 795         w->quant_dc_chroma        = w->quant + ((w->quant + 3) >> 3);
 796         w->divide_quant_dc_chroma = ((1 << 16) + (w->quant_dc_chroma >> 1)) / w->quant_dc_chroma;
 797     }
 798     x8_reset_vlc_tables(w);
 799
 800     for (w->mb_y = 0; w->mb_y < w->mb_height * 2; w->mb_y++) {
 801         x8_init_block_index(w, w->frame);
 802         mb_xy = (w->mb_y >> 1) * (w->mb_width + 1);
 803         for (w->mb_x = 0; w->mb_x < w->mb_width * 2; w->mb_x++) {
 804             x8_get_prediction(w);
 805             if (x8_setup_spatial_predictor(w, 0))
 806                 goto error;
 807             if (x8_decode_intra_mb(w, 0))
 808                 goto error;
 809
 810             if (w->mb_x & w->mb_y & 1) {
 811                 x8_get_prediction_chroma(w);
 812
 813                 /* when setting up chroma, no vlc is read,
 814                  * so no error condition can be reached */
 815                 x8_setup_spatial_predictor(w, 1);
 816                 if (x8_decode_intra_mb(w, 1))
 817                     goto error;
 818
 819                 x8_setup_spatial_predictor(w, 2);
 820                 if (x8_decode_intra_mb(w, 2))
 821                     goto error;
 822
 823                 w->dest[1] += 8;
 824                 w->dest[2] += 8;
 825
 826                 pict->qscale_table[mb_xy] = w->quant;
 827                 mb_xy++;
 828             }
 829             w->dest[0] += 8;
 830         }
 831         if (w->mb_y & 1)
 832             ff_draw_horiz_band(w->avctx, w->frame, w->frame,
 833                                (w->mb_y - 1) * 8, 16,
 834                                PICT_FRAME, 0, lowdelay);
 835     }
 836
 837 error:
 838     *mb_x = w->mb_x;
 839     *mb_y = w->mb_y;
 840
 841     return 0;
 842 }