quicktime/ffmpeg/libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * (SVQ1 Decoder)
  21  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  22  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  23  *
  24  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  25  */
  26
  27 /**
  28  * @file svq1.c
  29  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  30  * For more information of the SVQ1 algorithm, visit:
  31  *   http://www.pcisys.net/~melanson/codecs/
  32  */
  33
  34
  35 //#define DEBUG_SVQ1
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <unistd.h>
  40 #include <limits.h>
  41
  42 #include "common.h"
  43 #include "avcodec.h"
  44 #include "dsputil.h"
  45 #include "mpegvideo.h"
  46 #include "bswap.h"
  47
  48 #undef NDEBUG
  49 #include <assert.h>
  50
  51 extern const uint8_t mvtab[33][2];
  52
  53 static VLC svq1_block_type;
  54 static VLC svq1_motion_component;
  55 static VLC svq1_intra_multistage[6];
  56 static VLC svq1_inter_multistage[6];
  57 static VLC svq1_intra_mean;
  58 static VLC svq1_inter_mean;
  59
  60 #define SVQ1_BLOCK_SKIP         0
  61 #define SVQ1_BLOCK_INTER        1
  62 #define SVQ1_BLOCK_INTER_4V     2
  63 #define SVQ1_BLOCK_INTRA        3
  64
  65 typedef struct SVQ1Context {
  66     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  67     AVCodecContext *avctx;
  68     DSPContext dsp;
  69     AVFrame picture;
  70     AVFrame current_picture;
  71     AVFrame last_picture;
  72     PutBitContext pb;
  73     GetBitContext gb;
  74
  75     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  76
  77     int frame_width;
  78     int frame_height;
  79
  80     /* Y plane block dimensions */
  81     int y_block_width;
  82     int y_block_height;
  83
  84     /* U & V plane (C planes) block dimensions */
  85     int c_block_width;
  86     int c_block_height;
  87
  88     uint16_t *mb_type;
  89     uint32_t *dummy;
  90     int16_t (*motion_val8[3])[2];
  91     int16_t (*motion_val16[3])[2];
  92
  93     int64_t rd_total;
  94 } SVQ1Context;
  95
  96 /* motion vector (prediction) */
  97 typedef struct svq1_pmv_s {
  98   int            x;
  99   int            y;
 100 } svq1_pmv_t;
 101
 102 #include "svq1_cb.h"
 103 #include "svq1_vlc.h"
 104
 105 static const uint16_t checksum_table[256] = {
 106   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 107   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 108   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 109   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 110   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 111   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 112   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 113   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 114   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 115   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 116   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 117   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 118   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 119   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 120   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 121   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 122   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 123   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 124   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 125   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 126   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 127   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 128   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 129   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 130   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 131   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 132   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 133   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 134   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 135   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 136   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 137   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 138 };
 139
 140 static const uint8_t string_table[256] = {
 141   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 142   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 143   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 144   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 145   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 146   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 147   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 148   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 149   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 150   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 151   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 152   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 153   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 154   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 155   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 156   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 157   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 158   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 159   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 160   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 161   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 162   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 163   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 164   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 165   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 166   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 167   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 168   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 169   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 170   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 171   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 172   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 173 };
 174
 175 #define SVQ1_PROCESS_VECTOR()\
 176     for (; level > 0; i++) {\
 177       /* process next depth */\
 178       if (i == m) {\
 179         m = n;\
 180         if (--level == 0)\
 181           break;\
 182       }\
 183       /* divide block if next bit set */\
 184       if (get_bits (bitbuf, 1) == 0)\
 185         break;\
 186       /* add child nodes */\
 187       list[n++] = list[i];\
 188       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 189     }
 190
 191 #define SVQ1_ADD_CODEBOOK()\
 192           /* add codebook entries to vector */\
 193           for (j=0; j < stages; j++) {\
 194             n3  = codebook[entries[j]] ^ 0x80808080;\
 195             n1 += ((n3 & 0xFF00FF00) >> 8);\
 196             n2 +=  (n3 & 0x00FF00FF);\
 197           }\
 198 \
 199           /* clip to [0..255] */\
 200           if (n1 & 0xFF00FF00) {\
 201             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 202             n1 += 0x7F007F00;\
 203             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 204             n1 &= (n3 & 0x00FF00FF);\
 205           }\
 206 \
 207           if (n2 & 0xFF00FF00) {\
 208             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 209             n2 += 0x7F007F00;\
 210             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 211             n2 &= (n3 & 0x00FF00FF);\
 212           }
 213
 214 #define SVQ1_DO_CODEBOOK_INTRA()\
 215       for (y=0; y < height; y++) {\
 216         for (x=0; x < (width / 4); x++, codebook++) {\
 217         n1 = n4;\
 218         n2 = n4;\
 219         SVQ1_ADD_CODEBOOK()\
 220         /* store result */\
 221         dst[x] = (n1 << 8) | n2;\
 222         }\
 223         dst += (pitch / 4);\
 224       }
 225
 226 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 227       for (y=0; y < height; y++) {\
 228         for (x=0; x < (width / 4); x++, codebook++) {\
 229         n3 = dst[x];\
 230         /* add mean value to vector */\
 231         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 232         n2 =  (n3 & 0x00FF00FF)   + n4;\
 233         SVQ1_ADD_CODEBOOK()\
 234         /* store result */\
 235         dst[x] = (n1 << 8) | n2;\
 236         }\
 237         dst += (pitch / 4);\
 238       }
 239
 240 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 241       codebook = (const uint32_t *) cbook[level];\
 242       bit_cache = get_bits (bitbuf, 4*stages);\
 243       /* calculate codebook entries for this vector */\
 244       for (j=0; j < stages; j++) {\
 245         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 246       }\
 247       mean -= (stages * 128);\
 248       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 249
 250 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 251   uint32_t    bit_cache;
 252   uint8_t    *list[63];
 253   uint32_t   *dst;
 254   const uint32_t *codebook;
 255   int         entries[6];
 256   int         i, j, m, n;
 257   int         mean, stages;
 258   unsigned    x, y, width, height, level;
 259   uint32_t    n1, n2, n3, n4;
 260
 261   /* initialize list for breadth first processing of vectors */
 262   list[0] = pixels;
 263
 264   /* recursively process vector */
 265   for (i=0, m=1, n=1, level=5; i < n; i++) {
 266     SVQ1_PROCESS_VECTOR();
 267
 268     /* destination address and vector size */
 269     dst = (uint32_t *) list[i];
 270     width = 1 << ((4 + level) /2);
 271     height = 1 << ((3 + level) /2);
 272
 273     /* get number of stages (-1 skips vector, 0 for mean only) */
 274     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 275
 276     if (stages == -1) {
 277         for (y=0; y < height; y++) {
 278           memset (&dst[y*(pitch / 4)], 0, width);
 279         }
 280       continue;         /* skip vector */
 281     }
 282
 283     if ((stages > 0) && (level >= 4)) {
 284 #ifdef DEBUG_SVQ1
 285     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 286 #endif
 287       return -1;        /* invalid vector */
 288     }
 289
 290     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 291
 292     if (stages == 0) {
 293       for (y=0; y < height; y++) {
 294         memset (&dst[y*(pitch / 4)], mean, width);
 295       }
 296     } else {
 297       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 298       SVQ1_DO_CODEBOOK_INTRA()
 299     }
 300   }
 301
 302   return 0;
 303 }
 304
 305 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 306   uint32_t    bit_cache;
 307   uint8_t    *list[63];
 308   uint32_t   *dst;
 309   const uint32_t *codebook;
 310   int         entries[6];
 311   int         i, j, m, n;
 312   int         mean, stages;
 313   int         x, y, width, height, level;
 314   uint32_t    n1, n2, n3, n4;
 315
 316   /* initialize list for breadth first processing of vectors */
 317   list[0] = pixels;
 318
 319   /* recursively process vector */
 320   for (i=0, m=1, n=1, level=5; i < n; i++) {
 321     SVQ1_PROCESS_VECTOR();
 322
 323     /* destination address and vector size */
 324     dst = (uint32_t *) list[i];
 325     width = 1 << ((4 + level) /2);
 326     height = 1 << ((3 + level) /2);
 327
 328     /* get number of stages (-1 skips vector, 0 for mean only) */
 329     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 330
 331     if (stages == -1) continue; /* skip vector */
 332
 333     if ((stages > 0) && (level >= 4)) {
 334 #ifdef DEBUG_SVQ1
 335     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 336 #endif
 337       return -1;        /* invalid vector */
 338     }
 339
 340     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 341
 342     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 343     SVQ1_DO_CODEBOOK_NONINTRA()
 344   }
 345   return 0;
 346 }
 347
 348 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 349   int         diff;
 350   int         i;
 351
 352   for (i=0; i < 2; i++) {
 353
 354     /* get motion code */
 355     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 356     if(diff<0)
 357         return -1;
 358     else if(diff){
 359         if(get_bits1(bitbuf)) diff= -diff;
 360     }
 361
 362     /* add median of motion vector predictors and clip result */
 363     if (i == 1)
 364       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 365     else
 366       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 367   }
 368
 369   return 0;
 370 }
 371
 372 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 373   uint8_t *src;
 374   uint8_t *dst;
 375   int      i;
 376
 377   src = &previous[x + y*pitch];
 378   dst = current;
 379
 380   for (i=0; i < 16; i++) {
 381     memcpy (dst, src, 16);
 382     src += pitch;
 383     dst += pitch;
 384   }
 385 }
 386
 387 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 388                                uint8_t *current, uint8_t *previous, int pitch,
 389                                svq1_pmv_t *motion, int x, int y) {
 390   uint8_t    *src;
 391   uint8_t    *dst;
 392   svq1_pmv_t  mv;
 393   svq1_pmv_t *pmv[3];
 394   int         result;
 395
 396   /* predict and decode motion vector */
 397   pmv[0] = &motion[0];
 398   if (y == 0) {
 399     pmv[1] =
 400     pmv[2] = pmv[0];
 401   }
 402   else {
 403     pmv[1] = &motion[(x / 8) + 2];
 404     pmv[2] = &motion[(x / 8) + 4];
 405   }
 406
 407   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 408
 409   if (result != 0)
 410     return result;
 411
 412   motion[0].x           =
 413   motion[(x / 8) + 2].x =
 414   motion[(x / 8) + 3].x = mv.x;
 415   motion[0].y           =
 416   motion[(x / 8) + 2].y =
 417   motion[(x / 8) + 3].y = mv.y;
 418
 419   if(y + (mv.y >> 1)<0)
 420      mv.y= 0;
 421   if(x + (mv.x >> 1)<0)
 422      mv.x= 0;
 423
 424 #if 0
 425   int w= (s->width+15)&~15;
 426   int h= (s->height+15)&~15;
 427   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 428       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 429 #endif
 430
 431   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 432   dst = current;
 433
 434   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 435
 436   return 0;
 437 }
 438
 439 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 440                                   uint8_t *current, uint8_t *previous, int pitch,
 441                                   svq1_pmv_t *motion,int x, int y) {
 442   uint8_t    *src;
 443   uint8_t    *dst;
 444   svq1_pmv_t  mv;
 445   svq1_pmv_t *pmv[4];
 446   int         i, result;
 447
 448   /* predict and decode motion vector (0) */
 449   pmv[0] = &motion[0];
 450   if (y == 0) {
 451     pmv[1] =
 452     pmv[2] = pmv[0];
 453   }
 454   else {
 455     pmv[1] = &motion[(x / 8) + 2];
 456     pmv[2] = &motion[(x / 8) + 4];
 457   }
 458
 459   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 460
 461   if (result != 0)
 462     return result;
 463
 464   /* predict and decode motion vector (1) */
 465   pmv[0] = &mv;
 466   if (y == 0) {
 467     pmv[1] =
 468     pmv[2] = pmv[0];
 469   }
 470   else {
 471     pmv[1] = &motion[(x / 8) + 3];
 472   }
 473   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 474
 475   if (result != 0)
 476     return result;
 477
 478   /* predict and decode motion vector (2) */
 479   pmv[1] = &motion[0];
 480   pmv[2] = &motion[(x / 8) + 1];
 481
 482   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 483
 484   if (result != 0)
 485     return result;
 486
 487   /* predict and decode motion vector (3) */
 488   pmv[2] = &motion[(x / 8) + 2];
 489   pmv[3] = &motion[(x / 8) + 3];
 490
 491   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 492
 493   if (result != 0)
 494     return result;
 495
 496   /* form predictions */
 497   for (i=0; i < 4; i++) {
 498     int mvx= pmv[i]->x + (i&1)*16;
 499     int mvy= pmv[i]->y + (i>>1)*16;
 500
 501     ///XXX /FIXME cliping or padding?
 502     if(y + (mvy >> 1)<0)
 503        mvy= 0;
 504     if(x + (mvx >> 1)<0)
 505        mvx= 0;
 506
 507 #if 0
 508   int w= (s->width+15)&~15;
 509   int h= (s->height+15)&~15;
 510   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 511       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 512 #endif
 513     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 514     dst = current;
 515
 516     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 517
 518     /* select next block */
 519     if (i & 1) {
 520       current  += 8*(pitch - 1);
 521     } else {
 522       current  += 8;
 523     }
 524   }
 525
 526   return 0;
 527 }
 528
 529 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 530                         uint8_t *current, uint8_t *previous, int pitch,
 531                         svq1_pmv_t *motion, int x, int y) {
 532   uint32_t block_type;
 533   int      result = 0;
 534
 535   /* get block type */
 536   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 537
 538   /* reset motion vectors */
 539   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 540     motion[0].x           =
 541     motion[0].y           =
 542     motion[(x / 8) + 2].x =
 543     motion[(x / 8) + 2].y =
 544     motion[(x / 8) + 3].x =
 545     motion[(x / 8) + 3].y = 0;
 546   }
 547
 548   switch (block_type) {
 549   case SVQ1_BLOCK_SKIP:
 550     svq1_skip_block (current, previous, pitch, x, y);
 551     break;
 552
 553   case SVQ1_BLOCK_INTER:
 554     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 555
 556     if (result != 0)
 557     {
 558 #ifdef DEBUG_SVQ1
 559     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 560 #endif
 561       break;
 562     }
 563     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 564     break;
 565
 566   case SVQ1_BLOCK_INTER_4V:
 567     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 568
 569     if (result != 0)
 570     {
 571 #ifdef DEBUG_SVQ1
 572     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 573 #endif
 574       break;
 575     }
 576     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 577     break;
 578
 579   case SVQ1_BLOCK_INTRA:
 580     result = svq1_decode_block_intra (bitbuf, current, pitch);
 581     break;
 582   }
 583
 584   return result;
 585 }
 586
 587 /* standard video sizes */
 588 static struct { int width; int height; } svq1_frame_size_table[8] = {
 589   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 590   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 591 };
 592
 593 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 594   int i;
 595
 596   for (i=0; i < length; i++) {
 597     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 598   }
 599
 600   return value;
 601 }
 602
 603 #if 0 /* unused, remove? */
 604 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 605                                          int width, int height, int value) {
 606   int x, y;
 607
 608   for (y=0; y < height; y++) {
 609     for (x=0; x < width; x++) {
 610       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 611     }
 612
 613     pixels += pitch;
 614   }
 615
 616   return value;
 617 }
 618 #endif
 619
 620 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 621   uint8_t seed;
 622   int     i;
 623
 624   out[0] = get_bits (bitbuf, 8);
 625
 626   seed = string_table[out[0]];
 627
 628   for (i=1; i <= out[0]; i++) {
 629     out[i] = get_bits (bitbuf, 8) ^ seed;
 630     seed   = string_table[out[i] ^ seed];
 631   }
 632 }
 633
 634 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 635   int frame_size_code;
 636   int temporal_reference;
 637
 638   temporal_reference = get_bits (bitbuf, 8);
 639
 640   /* frame type */
 641   s->pict_type= get_bits (bitbuf, 2)+1;
 642   if(s->pict_type==4)
 643       return -1;
 644
 645   if (s->pict_type == I_TYPE) {
 646
 647     /* unknown fields */
 648     if (s->f_code == 0x50 || s->f_code == 0x60) {
 649       int csum = get_bits (bitbuf, 16);
 650
 651       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 652
 653 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 654 //              (csum == 0) ? "correct" : "incorrect", csum);
 655     }
 656
 657     if ((s->f_code ^ 0x10) >= 0x50) {
 658       char msg[256];
 659
 660       svq1_parse_string (bitbuf, (char *) msg);
 661
 662       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 663     }
 664
 665     skip_bits (bitbuf, 2);
 666     skip_bits (bitbuf, 2);
 667     skip_bits1 (bitbuf);
 668
 669     /* load frame size */
 670     frame_size_code = get_bits (bitbuf, 3);
 671
 672     if (frame_size_code == 7) {
 673       /* load width, height (12 bits each) */
 674       s->width = get_bits (bitbuf, 12);
 675       s->height = get_bits (bitbuf, 12);
 676
 677       if (!s->width || !s->height)
 678         return -1;
 679     } else {
 680       /* get width, height from table */
 681       s->width = svq1_frame_size_table[frame_size_code].width;
 682       s->height = svq1_frame_size_table[frame_size_code].height;
 683     }
 684   }
 685
 686   /* unknown fields */
 687   if (get_bits (bitbuf, 1) == 1) {
 688     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 689     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 690
 691     if (get_bits (bitbuf, 2) != 0)
 692       return -1;
 693   }
 694
 695   if (get_bits (bitbuf, 1) == 1) {
 696     skip_bits1 (bitbuf);
 697     skip_bits (bitbuf, 4);
 698     skip_bits1 (bitbuf);
 699     skip_bits (bitbuf, 2);
 700
 701     while (get_bits (bitbuf, 1) == 1) {
 702       skip_bits (bitbuf, 8);
 703     }
 704   }
 705
 706   return 0;
 707 }
 708
 709 static int svq1_decode_frame(AVCodecContext *avctx,
 710                              void *data, int *data_size,
 711                              uint8_t *buf, int buf_size)
 712 {
 713   MpegEncContext *s=avctx->priv_data;
 714   uint8_t      *current, *previous;
 715   int           result, i, x, y, width, height;
 716   AVFrame *pict = data;
 717
 718   /* initialize bit buffer */
 719   init_get_bits(&s->gb,buf,buf_size*8);
 720
 721   /* decode frame header */
 722   s->f_code = get_bits (&s->gb, 22);
 723
 724   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 725     return -1;
 726
 727   /* swap some header bytes (why?) */
 728   if (s->f_code != 0x20) {
 729     uint32_t *src = (uint32_t *) (buf + 4);
 730
 731     for (i=0; i < 4; i++) {
 732       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 733     }
 734   }
 735
 736   result = svq1_decode_frame_header (&s->gb, s);
 737
 738   if (result != 0)
 739   {
 740 #ifdef DEBUG_SVQ1
 741     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 742 #endif
 743     return result;
 744   }
 745
 746   //FIXME this avoids some confusion for "B frames" without 2 references
 747   //this should be removed after libavcodec can handle more flexible picture types & ordering
 748   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 749
 750   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 751   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
 752      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
 753      || avctx->skip_frame >= AVDISCARD_ALL)
 754       return buf_size;
 755
 756   if(MPV_frame_start(s, avctx) < 0)
 757       return -1;
 758
 759   /* decode y, u and v components */
 760   for (i=0; i < 3; i++) {
 761     int linesize;
 762     if (i == 0) {
 763       width  = (s->width+15)&~15;
 764       height = (s->height+15)&~15;
 765       linesize= s->linesize;
 766     } else {
 767       if(s->flags&CODEC_FLAG_GRAY) break;
 768       width  = (s->width/4+15)&~15;
 769       height = (s->height/4+15)&~15;
 770       linesize= s->uvlinesize;
 771     }
 772
 773     current  = s->current_picture.data[i];
 774
 775     if(s->pict_type==B_TYPE){
 776         previous = s->next_picture.data[i];
 777     }else{
 778         previous = s->last_picture.data[i];
 779     }
 780
 781     if (s->pict_type == I_TYPE) {
 782       /* keyframe */
 783       for (y=0; y < height; y+=16) {
 784         for (x=0; x < width; x+=16) {
 785           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 786           if (result != 0)
 787           {
 788 //#ifdef DEBUG_SVQ1
 789             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 790 //#endif
 791             return result;
 792           }
 793         }
 794         current += 16*linesize;
 795       }
 796     } else {
 797       svq1_pmv_t pmv[width/8+3];
 798       /* delta frame */
 799       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 800
 801       for (y=0; y < height; y+=16) {
 802         for (x=0; x < width; x+=16) {
 803           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 804                                             linesize, pmv, x, y);
 805           if (result != 0)
 806           {
 807 #ifdef DEBUG_SVQ1
 808     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 809 #endif
 810             return result;
 811           }
 812         }
 813
 814         pmv[0].x =
 815         pmv[0].y = 0;
 816
 817         current += 16*linesize;
 818       }
 819     }
 820   }
 821
 822   *pict = *(AVFrame*)&s->current_picture;
 823
 824
 825   MPV_frame_end(s);
 826
 827   *data_size=sizeof(AVFrame);
 828   return buf_size;
 829 }
 830
 831 static int svq1_decode_init(AVCodecContext *avctx)
 832 {
 833     MpegEncContext *s = avctx->priv_data;
 834     int i;
 835
 836     MPV_decode_defaults(s);
 837
 838     s->avctx = avctx;
 839     s->width = (avctx->width+3)&~3;
 840     s->height = (avctx->height+3)&~3;
 841     s->codec_id= avctx->codec->id;
 842     avctx->pix_fmt = PIX_FMT_YUV410P;
 843     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 844     s->flags= avctx->flags;
 845     if (MPV_common_init(s) < 0) return -1;
 846
 847     init_vlc(&svq1_block_type, 2, 4,
 848         &svq1_block_type_vlc[0][1], 2, 1,
 849         &svq1_block_type_vlc[0][0], 2, 1, 1);
 850
 851     init_vlc(&svq1_motion_component, 7, 33,
 852         &mvtab[0][1], 2, 1,
 853         &mvtab[0][0], 2, 1, 1);
 854
 855     for (i = 0; i < 6; i++) {
 856         init_vlc(&svq1_intra_multistage[i], 3, 8,
 857             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 858             &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
 859         init_vlc(&svq1_inter_multistage[i], 3, 8,
 860             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 861             &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
 862     }
 863
 864     init_vlc(&svq1_intra_mean, 8, 256,
 865         &svq1_intra_mean_vlc[0][1], 4, 2,
 866         &svq1_intra_mean_vlc[0][0], 4, 2, 1);
 867
 868     init_vlc(&svq1_inter_mean, 9, 512,
 869         &svq1_inter_mean_vlc[0][1], 4, 2,
 870         &svq1_inter_mean_vlc[0][0], 4, 2, 1);
 871
 872     return 0;
 873 }
 874
 875 static int svq1_decode_end(AVCodecContext *avctx)
 876 {
 877     MpegEncContext *s = avctx->priv_data;
 878
 879     MPV_common_end(s);
 880     return 0;
 881 }
 882
 883 static void svq1_write_header(SVQ1Context *s, int frame_type)
 884 {
 885     int i;
 886
 887     /* frame code */
 888     put_bits(&s->pb, 22, 0x20);
 889
 890     /* temporal reference (sure hope this is a "don't care") */
 891     put_bits(&s->pb, 8, 0x00);
 892
 893     /* frame type */
 894     put_bits(&s->pb, 2, frame_type - 1);
 895
 896     if (frame_type == I_TYPE) {
 897
 898         /* no checksum since frame code is 0x20 */
 899
 900         /* no embedded string either */
 901
 902         /* output 5 unknown bits (2 + 2 + 1) */
 903         put_bits(&s->pb, 5, 0);
 904
 905         for (i = 0; i < 7; i++)
 906         {
 907             if ((svq1_frame_size_table[i].width == s->frame_width) &&
 908                 (svq1_frame_size_table[i].height == s->frame_height))
 909             {
 910                 put_bits(&s->pb, 3, i);
 911                 break;
 912             }
 913         }
 914
 915         if (i == 7)
 916         {
 917             put_bits(&s->pb, 3, 7);
 918             put_bits(&s->pb, 12, s->frame_width);
 919             put_bits(&s->pb, 12, s->frame_height);
 920         }
 921     }
 922
 923     /* no checksum or extra data (next 2 bits get 0) */
 924     put_bits(&s->pb, 2, 0);
 925 }
 926
 927
 928 #define QUALITY_THRESHOLD 100
 929 #define THRESHOLD_MULTIPLIER 0.6
 930
 931 #if defined(HAVE_ALTIVEC)
 932 #undef vector
 933 #endif
 934
 935 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 936     int count, y, x, i, j, split, best_mean, best_score, best_count;
 937     int best_vector[6];
 938     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 939     int w= 2<<((level+2)>>1);
 940     int h= 2<<((level+1)>>1);
 941     int size=w*h;
 942     int16_t block[7][256];
 943     const int8_t *codebook_sum, *codebook;
 944     const uint16_t (*mean_vlc)[2];
 945     const uint8_t (*multistage_vlc)[2];
 946
 947     best_score=0;
 948     //FIXME optimize, this doenst need to be done multiple times
 949     if(intra){
 950         codebook_sum= svq1_intra_codebook_sum[level];
 951         codebook= svq1_intra_codebooks[level];
 952         mean_vlc= svq1_intra_mean_vlc;
 953         multistage_vlc= svq1_intra_multistage_vlc[level];
 954         for(y=0; y<h; y++){
 955             for(x=0; x<w; x++){
 956                 int v= src[x + y*stride];
 957                 block[0][x + w*y]= v;
 958                 best_score += v*v;
 959                 block_sum[0] += v;
 960             }
 961         }
 962     }else{
 963         codebook_sum= svq1_inter_codebook_sum[level];
 964         codebook= svq1_inter_codebooks[level];
 965         mean_vlc= svq1_inter_mean_vlc + 256;
 966         multistage_vlc= svq1_inter_multistage_vlc[level];
 967         for(y=0; y<h; y++){
 968             for(x=0; x<w; x++){
 969                 int v= src[x + y*stride] - ref[x + y*stride];
 970                 block[0][x + w*y]= v;
 971                 best_score += v*v;
 972                 block_sum[0] += v;
 973             }
 974         }
 975     }
 976
 977     best_count=0;
 978     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 979     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 980
 981     if(level<4){
 982         for(count=1; count<7; count++){
 983             int best_vector_score= INT_MAX;
 984             int best_vector_sum=-999, best_vector_mean=-999;
 985             const int stage= count-1;
 986             const int8_t *vector;
 987
 988             for(i=0; i<16; i++){
 989                 int sum= codebook_sum[stage*16 + i];
 990                 int sqr=0;
 991                 int diff, mean, score;
 992
 993                 vector = codebook + stage*size*16 + i*size;
 994
 995                 for(j=0; j<size; j++){
 996                     int v= vector[j];
 997                     sqr += (v - block[stage][j])*(v - block[stage][j]);
 998                 }
 999                 diff= block_sum[stage] - sum;
1000                 mean= (diff + (size>>1)) >> (level+3);
1001                 assert(mean >-300 && mean<300);
1002                 if(intra) mean= clip(mean, 0, 255);
1003                 else      mean= clip(mean, -256, 255);
1004                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1005                 if(score < best_vector_score){
1006                     best_vector_score= score;
1007                     best_vector[stage]= i;
1008                     best_vector_sum= sum;
1009                     best_vector_mean= mean;
1010                 }
1011             }
1012             assert(best_vector_mean != -999);
1013             vector= codebook + stage*size*16 + best_vector[stage]*size;
1014             for(j=0; j<size; j++){
1015                 block[stage+1][j] = block[stage][j] - vector[j];
1016             }
1017             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1018             best_vector_score +=
1019                 lambda*(+ 1 + 4*count
1020                         + multistage_vlc[1+count][1]
1021                         + mean_vlc[best_vector_mean][1]);
1022
1023             if(best_vector_score < best_score){
1024                 best_score= best_vector_score;
1025                 best_count= count;
1026                 best_mean= best_vector_mean;
1027             }
1028         }
1029     }
1030
1031     split=0;
1032     if(best_score > threshold && level){
1033         int score=0;
1034         int offset= (level&1) ? stride*h/2 : w/2;
1035         PutBitContext backup[6];
1036
1037         for(i=level-1; i>=0; i--){
1038             backup[i]= s->reorder_pb[i];
1039         }
1040         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1041         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1042         score += lambda;
1043
1044         if(score < best_score){
1045             best_score= score;
1046             split=1;
1047         }else{
1048             for(i=level-1; i>=0; i--){
1049                 s->reorder_pb[i]= backup[i];
1050             }
1051         }
1052     }
1053     if (level > 0)
1054         put_bits(&s->reorder_pb[level], 1, split);
1055
1056     if(!split){
1057         assert((best_mean >= 0 && best_mean<256) || !intra);
1058         assert(best_mean >= -256 && best_mean<256);
1059         assert(best_count >=0 && best_count<7);
1060         assert(level<4 || best_count==0);
1061
1062         /* output the encoding */
1063         put_bits(&s->reorder_pb[level],
1064             multistage_vlc[1 + best_count][1],
1065             multistage_vlc[1 + best_count][0]);
1066         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1067             mean_vlc[best_mean][0]);
1068
1069         for (i = 0; i < best_count; i++){
1070             assert(best_vector[i]>=0 && best_vector[i]<16);
1071             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1072         }
1073
1074         for(y=0; y<h; y++){
1075             for(x=0; x<w; x++){
1076                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1077             }
1078         }
1079     }
1080
1081     return best_score;
1082 }
1083
1084 #ifdef CONFIG_ENCODERS
1085
1086 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1087     int width, int height, int src_stride, int stride)
1088 {
1089     int x, y;
1090     int i;
1091     int block_width, block_height;
1092     int level;
1093     int threshold[6];
1094     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1095
1096     /* figure out the acceptable level thresholds in advance */
1097     threshold[5] = QUALITY_THRESHOLD;
1098     for (level = 4; level >= 0; level--)
1099         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1100
1101     block_width = (width + 15) / 16;
1102     block_height = (height + 15) / 16;
1103
1104     if(s->picture.pict_type == P_TYPE){
1105         s->m.avctx= s->avctx;
1106         s->m.current_picture_ptr= &s->m.current_picture;
1107         s->m.last_picture_ptr   = &s->m.last_picture;
1108         s->m.last_picture.data[0]= ref_plane;
1109         s->m.linesize=
1110         s->m.last_picture.linesize[0]=
1111         s->m.new_picture.linesize[0]=
1112         s->m.current_picture.linesize[0]= stride;
1113         s->m.width= width;
1114         s->m.height= height;
1115         s->m.mb_width= block_width;
1116         s->m.mb_height= block_height;
1117         s->m.mb_stride= s->m.mb_width+1;
1118         s->m.b8_stride= 2*s->m.mb_width+1;
1119         s->m.f_code=1;
1120         s->m.pict_type= s->picture.pict_type;
1121         s->m.qscale= s->picture.quality/FF_QP2LAMBDA;
1122         s->m.me_method= s->avctx->me_method;
1123
1124         if(!s->motion_val8[plane]){
1125             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
1126             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
1127         }
1128
1129         s->m.mb_type= s->mb_type;
1130
1131         //dummies, to avoid segfaults
1132         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1133         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1134         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1135         s->m.current_picture.mb_type= s->dummy;
1136
1137         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
1138         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
1139         s->m.dsp= s->dsp; //move
1140         ff_init_me(&s->m);
1141
1142         s->m.me.dia_size= s->avctx->dia_size;
1143         s->m.first_slice_line=1;
1144         for (y = 0; y < block_height; y++) {
1145             uint8_t src[stride*16];
1146
1147             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1148             s->m.mb_y= y;
1149
1150             for(i=0; i<16 && i + 16*y<height; i++){
1151                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1152                 for(x=width; x<16*block_width; x++)
1153                     src[i*stride+x]= src[i*stride+x-1];
1154             }
1155             for(; i<16 && i + 16*y<16*block_height; i++)
1156                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1157
1158             for (x = 0; x < block_width; x++) {
1159                 s->m.mb_x= x;
1160                 ff_init_block_index(&s->m);
1161                 ff_update_block_index(&s->m);
1162
1163                 ff_estimate_p_frame_motion(&s->m, x, y);
1164             }
1165             s->m.first_slice_line=0;
1166         }
1167
1168         ff_fix_long_p_mvs(&s->m);
1169         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1170     }
1171
1172     s->m.first_slice_line=1;
1173     for (y = 0; y < block_height; y++) {
1174         uint8_t src[stride*16];
1175
1176         for(i=0; i<16 && i + 16*y<height; i++){
1177             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1178             for(x=width; x<16*block_width; x++)
1179                 src[i*stride+x]= src[i*stride+x-1];
1180         }
1181         for(; i<16 && i + 16*y<16*block_height; i++)
1182             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1183
1184         s->m.mb_y= y;
1185         for (x = 0; x < block_width; x++) {
1186             uint8_t reorder_buffer[3][6][7*32];
1187             int count[3][6];
1188             int offset = y * 16 * stride + x * 16;
1189             uint8_t *decoded= decoded_plane + offset;
1190             uint8_t *ref= ref_plane + offset;
1191             int score[4]={0,0,0,0}, best;
1192             uint8_t temp[16*stride];
1193
1194             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
1195                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1196                 return -1;
1197             }
1198
1199             s->m.mb_x= x;
1200             ff_init_block_index(&s->m);
1201             ff_update_block_index(&s->m);
1202
1203             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1204                 for(i=0; i<6; i++){
1205                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1206                 }
1207                 if(s->picture.pict_type == P_TYPE){
1208                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1209                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1210                     score[0]= vlc[1]*lambda;
1211                 }
1212                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1213                 for(i=0; i<6; i++){
1214                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1215                     flush_put_bits(&s->reorder_pb[i]);
1216                 }
1217             }else
1218                 score[0]= INT_MAX;
1219
1220             best=0;
1221
1222             if(s->picture.pict_type == P_TYPE){
1223                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1224                 int mx, my, pred_x, pred_y, dxy;
1225                 int16_t *motion_ptr;
1226
1227                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1228                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1229                     for(i=0; i<6; i++)
1230                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1231
1232                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1233
1234                     s->m.pb= s->reorder_pb[5];
1235                     mx= motion_ptr[0];
1236                     my= motion_ptr[1];
1237                     assert(mx>=-32 && mx<=31);
1238                     assert(my>=-32 && my<=31);
1239                     assert(pred_x>=-32 && pred_x<=31);
1240                     assert(pred_y>=-32 && pred_y<=31);
1241                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1242                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1243                     s->reorder_pb[5]= s->m.pb;
1244                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1245
1246                     dxy= (mx&1) + 2*(my&1);
1247
1248                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1249
1250                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1251                     best= score[1] <= score[0];
1252
1253                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1254                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1255                     score[2]+= vlc[1]*lambda;
1256                     if(score[2] < score[best] && mx==0 && my==0){
1257                         best=2;
1258                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1259                         for(i=0; i<6; i++){
1260                             count[2][i]=0;
1261                         }
1262                         put_bits(&s->pb, vlc[1], vlc[0]);
1263                     }
1264                 }
1265
1266                 if(best==1){
1267                     for(i=0; i<6; i++){
1268                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1269                         flush_put_bits(&s->reorder_pb[i]);
1270                     }
1271                 }else{
1272                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1273                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1274                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1275                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1276                 }
1277             }
1278
1279             s->rd_total += score[best];
1280
1281             for(i=5; i>=0; i--){
1282                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1283             }
1284             if(best==0){
1285                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1286             }
1287         }
1288         s->m.first_slice_line=0;
1289     }
1290     return 0;
1291 }
1292
1293 static int svq1_encode_init(AVCodecContext *avctx)
1294 {
1295     SVQ1Context * const s = avctx->priv_data;
1296
1297     dsputil_init(&s->dsp, avctx);
1298     avctx->coded_frame= (AVFrame*)&s->picture;
1299
1300     s->frame_width = avctx->width;
1301     s->frame_height = avctx->height;
1302
1303     s->y_block_width = (s->frame_width + 15) / 16;
1304     s->y_block_height = (s->frame_height + 15) / 16;
1305
1306     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1307     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1308
1309     s->avctx= avctx;
1310     s->m.avctx= avctx;
1311     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1312     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1313     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1314     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1315     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1316     h263_encode_init(&s->m); //mv_penalty
1317
1318     return 0;
1319 }
1320
1321 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1322     int buf_size, void *data)
1323 {
1324     SVQ1Context * const s = avctx->priv_data;
1325     AVFrame *pict = data;
1326     AVFrame * const p= (AVFrame*)&s->picture;
1327     AVFrame temp;
1328     int i;
1329
1330     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1331         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1332         return -1;
1333     }
1334
1335     if(!s->current_picture.data[0]){
1336         avctx->get_buffer(avctx, &s->current_picture);
1337         avctx->get_buffer(avctx, &s->last_picture);
1338     }
1339
1340     temp= s->current_picture;
1341     s->current_picture= s->last_picture;
1342     s->last_picture= temp;
1343
1344     init_put_bits(&s->pb, buf, buf_size);
1345
1346     *p = *pict;
1347     p->pict_type = avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1348     p->key_frame = p->pict_type == I_TYPE;
1349
1350     svq1_write_header(s, p->pict_type);
1351     for(i=0; i<3; i++){
1352         if(svq1_encode_plane(s, i,
1353             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1354             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1355             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
1356                 return -1;
1357     }
1358
1359 //    align_put_bits(&s->pb);
1360     while(put_bits_count(&s->pb) & 31)
1361         put_bits(&s->pb, 1, 0);
1362
1363     flush_put_bits(&s->pb);
1364
1365     return (put_bits_count(&s->pb) / 8);
1366 }
1367
1368 static int svq1_encode_end(AVCodecContext *avctx)
1369 {
1370     SVQ1Context * const s = avctx->priv_data;
1371     int i;
1372
1373     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1374
1375     av_freep(&s->m.me.scratchpad);
1376     av_freep(&s->m.me.map);
1377     av_freep(&s->m.me.score_map);
1378     av_freep(&s->mb_type);
1379     av_freep(&s->dummy);
1380
1381     for(i=0; i<3; i++){
1382         av_freep(&s->motion_val8[i]);
1383         av_freep(&s->motion_val16[i]);
1384     }
1385
1386     return 0;
1387 }
1388
1389 #endif //CONFIG_ENCODERS
1390
1391 AVCodec svq1_decoder = {
1392     "svq1",
1393     CODEC_TYPE_VIDEO,
1394     CODEC_ID_SVQ1,
1395     sizeof(MpegEncContext),
1396     svq1_decode_init,
1397     NULL,
1398     svq1_decode_end,
1399     svq1_decode_frame,
1400     CODEC_CAP_DR1,
1401     .flush= ff_mpeg_flush,
1402     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1403 };
1404
1405 #ifdef CONFIG_ENCODERS
1406
1407 AVCodec svq1_encoder = {
1408     "svq1",
1409     CODEC_TYPE_VIDEO,
1410     CODEC_ID_SVQ1,
1411     sizeof(SVQ1Context),
1412     svq1_encode_init,
1413     svq1_encode_frame,
1414     svq1_encode_end,
1415     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1416 };
1417
1418 #endif //CONFIG_ENCODERS