libavcodec/snow.c

   1 /*
   2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "avcodec.h"
  22 #include "dsputil.h"
  23 #include "snow.h"
  24
  25 #include "rangecoder.h"
  26
  27 #include "mpegvideo.h"
  28
  29 #undef NDEBUG
  30 #include <assert.h>
  31
  32 static const int8_t quant3[256]={
  33  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  34  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
  49 };
  50 static const int8_t quant3b[256]={
  51  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  52  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  53  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  54  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  55  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  57  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  58  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  67 };
  68 static const int8_t quant3bA[256]={
  69  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  70  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  71  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  72  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  73  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  74  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  75  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  76  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  77  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  78  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  79  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  80  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  81  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  82  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  83  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  84  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  85 };
  86 static const int8_t quant5[256]={
  87  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  88  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  89  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  90  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  91  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
 103 };
 104 static const int8_t quant7[256]={
 105  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 106  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
 108  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
 119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
 121 };
 122 static const int8_t quant9[256]={
 123  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 124  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 125  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
 138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
 139 };
 140 static const int8_t quant11[256]={
 141  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
 142  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 143  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 144  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
 155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
 157 };
 158 static const int8_t quant13[256]={
 159  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 160  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 161  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 162  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 163  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
 172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
 175 };
 176
 177 #if 0 //64*cubic
 178 static const uint8_t obmc32[1024]={
 179   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 180   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 181   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 182   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 183   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 184   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 185   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 186   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 187   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 188   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 189   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 190   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 191   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 192   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 193   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 194   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 196   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 197   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 198   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 199   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 200   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 201   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 202   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 203   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 204   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 205   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 206   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 207   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 208   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 209   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 210   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 211 //error:0.000022
 212 };
 213 static const uint8_t obmc16[256]={
 214   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 215   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 216   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 217   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 218   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 219   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 220   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 221   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 223   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 224   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 225   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 226   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 227   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 228   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 229   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 230 //error:0.000033
 231 };
 232 #elif 1 // 64*linear
 233 static const uint8_t obmc32[1024]={
 234   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 235   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 236   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 237   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 238   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 239   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 240   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 241   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 242   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 243   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 244   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 245   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 246   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 247   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 248   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 249   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 251   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 252   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 253   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 254   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 255   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 256   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 257   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 258   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 259   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 260   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 261   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 262   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 263   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 264   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 265   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 266  //error:0.000020
 267 };
 268 static const uint8_t obmc16[256]={
 269   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 270   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 271   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 272   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 273   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 274  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 275  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 276  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 278  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 279  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 280   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 281   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 282   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 283   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 284   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 285 //error:0.000015
 286 };
 287 #else //64*cos
 288 static const uint8_t obmc32[1024]={
 289   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 290   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 291   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 292   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 293   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 294   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 295   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 296   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 297   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 298   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 299   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 300   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 301   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 302   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 303   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 304   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 306   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 307   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 308   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 309   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 310   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 311   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 312   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 313   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 314   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 315   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 316   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 317   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 318   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 319   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 320   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 321 //error:0.000022
 322 };
 323 static const uint8_t obmc16[256]={
 324   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 325   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 326   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 327   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 328   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 329   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 330   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 331   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 333   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 334   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 335   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 336   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 337   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 338   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 339   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 340 //error:0.000022
 341 };
 342 #endif /* 0 */
 343
 344 //linear *64
 345 static const uint8_t obmc8[64]={
 346   4, 12, 20, 28, 28, 20, 12,  4,
 347  12, 36, 60, 84, 84, 60, 36, 12,
 348  20, 60,100,140,140,100, 60, 20,
 349  28, 84,140,196,196,140, 84, 28,
 350  28, 84,140,196,196,140, 84, 28,
 351  20, 60,100,140,140,100, 60, 20,
 352  12, 36, 60, 84, 84, 60, 36, 12,
 353   4, 12, 20, 28, 28, 20, 12,  4,
 354 //error:0.000000
 355 };
 356
 357 //linear *64
 358 static const uint8_t obmc4[16]={
 359  16, 48, 48, 16,
 360  48,144,144, 48,
 361  48,144,144, 48,
 362  16, 48, 48, 16,
 363 //error:0.000000
 364 };
 365
 366 static const uint8_t *obmc_tab[4]={
 367     obmc32, obmc16, obmc8, obmc4
 368 };
 369
 370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
 371
 372 typedef struct BlockNode{
 373     int16_t mx;
 374     int16_t my;
 375     uint8_t ref;
 376     uint8_t color[3];
 377     uint8_t type;
 378 //#define TYPE_SPLIT    1
 379 #define BLOCK_INTRA   1
 380 #define BLOCK_OPT     2
 381 //#define TYPE_NOCOLOR  4
 382     uint8_t level; //FIXME merge into type?
 383 }BlockNode;
 384
 385 static const BlockNode null_block= { //FIXME add border maybe
 386     .color= {128,128,128},
 387     .mx= 0,
 388     .my= 0,
 389     .ref= 0,
 390     .type= 0,
 391     .level= 0,
 392 };
 393
 394 #define LOG2_MB_SIZE 4
 395 #define MB_SIZE (1<<LOG2_MB_SIZE)
 396 #define ENCODER_EXTRA_BITS 4
 397 #define HTAPS_MAX 8
 398
 399 typedef struct x_and_coeff{
 400     int16_t x;
 401     uint16_t coeff;
 402 } x_and_coeff;
 403
 404 typedef struct SubBand{
 405     int level;
 406     int stride;
 407     int width;
 408     int height;
 409     int qlog;        ///< log(qscale)/log[2^(1/6)]
 410     DWTELEM *buf;
 411     IDWTELEM *ibuf;
 412     int buf_x_offset;
 413     int buf_y_offset;
 414     int stride_line; ///< Stride measured in lines, not pixels.
 415     x_and_coeff * x_coeff;
 416     struct SubBand *parent;
 417     uint8_t state[/*7*2*/ 7 + 512][32];
 418 }SubBand;
 419
 420 typedef struct Plane{
 421     int width;
 422     int height;
 423     SubBand band[MAX_DECOMPOSITIONS][4];
 424
 425     int htaps;
 426     int8_t hcoeff[HTAPS_MAX/2];
 427     int diag_mc;
 428     int fast_mc;
 429
 430     int last_htaps;
 431     int8_t last_hcoeff[HTAPS_MAX/2];
 432     int last_diag_mc;
 433 }Plane;
 434
 435 typedef struct SnowContext{
 436 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 437
 438     AVCodecContext *avctx;
 439     RangeCoder c;
 440     DSPContext dsp;
 441     AVFrame new_picture;
 442     AVFrame input_picture;              ///< new_picture with the internal linesizes
 443     AVFrame current_picture;
 444     AVFrame last_picture[MAX_REF_FRAMES];
 445     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
 446     AVFrame mconly_picture;
 447 //     uint8_t q_context[16];
 448     uint8_t header_state[32];
 449     uint8_t block_state[128 + 32*128];
 450     int keyframe;
 451     int always_reset;
 452     int version;
 453     int spatial_decomposition_type;
 454     int last_spatial_decomposition_type;
 455     int temporal_decomposition_type;
 456     int spatial_decomposition_count;
 457     int last_spatial_decomposition_count;
 458     int temporal_decomposition_count;
 459     int max_ref_frames;
 460     int ref_frames;
 461     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
 462     uint32_t *ref_scores[MAX_REF_FRAMES];
 463     DWTELEM *spatial_dwt_buffer;
 464     IDWTELEM *spatial_idwt_buffer;
 465     int colorspace_type;
 466     int chroma_h_shift;
 467     int chroma_v_shift;
 468     int spatial_scalability;
 469     int qlog;
 470     int last_qlog;
 471     int lambda;
 472     int lambda2;
 473     int pass1_rc;
 474     int mv_scale;
 475     int last_mv_scale;
 476     int qbias;
 477     int last_qbias;
 478 #define QBIAS_SHIFT 3
 479     int b_width;
 480     int b_height;
 481     int block_max_depth;
 482     int last_block_max_depth;
 483     Plane plane[MAX_PLANES];
 484     BlockNode *block;
 485 #define ME_CACHE_SIZE 1024
 486     int me_cache[ME_CACHE_SIZE];
 487     int me_cache_generation;
 488     slice_buffer sb;
 489
 490     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 491 }SnowContext;
 492
 493 typedef struct {
 494     IDWTELEM *b0;
 495     IDWTELEM *b1;
 496     IDWTELEM *b2;
 497     IDWTELEM *b3;
 498     int y;
 499 } dwt_compose_t;
 500
 501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
 503
 504 static void iterative_me(SnowContext *s);
 505
 506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
 507 {
 508     int i;
 509
 510     buf->base_buffer = base_buffer;
 511     buf->line_count = line_count;
 512     buf->line_width = line_width;
 513     buf->data_count = max_allocated_lines;
 514     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
 515     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
 516
 517     for(i = 0; i < max_allocated_lines; i++){
 518         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
 519     }
 520
 521     buf->data_stack_top = max_allocated_lines - 1;
 522 }
 523
 524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
 525 {
 526     int offset;
 527     IDWTELEM * buffer;
 528
 529     assert(buf->data_stack_top >= 0);
 530 //  assert(!buf->line[line]);
 531     if (buf->line[line])
 532         return buf->line[line];
 533
 534     offset = buf->line_width * line;
 535     buffer = buf->data_stack[buf->data_stack_top];
 536     buf->data_stack_top--;
 537     buf->line[line] = buffer;
 538
 539     return buffer;
 540 }
 541
 542 static void slice_buffer_release(slice_buffer * buf, int line)
 543 {
 544     int offset;
 545     IDWTELEM * buffer;
 546
 547     assert(line >= 0 && line < buf->line_count);
 548     assert(buf->line[line]);
 549
 550     offset = buf->line_width * line;
 551     buffer = buf->line[line];
 552     buf->data_stack_top++;
 553     buf->data_stack[buf->data_stack_top] = buffer;
 554     buf->line[line] = NULL;
 555 }
 556
 557 static void slice_buffer_flush(slice_buffer * buf)
 558 {
 559     int i;
 560     for(i = 0; i < buf->line_count; i++){
 561         if (buf->line[i])
 562             slice_buffer_release(buf, i);
 563     }
 564 }
 565
 566 static void slice_buffer_destroy(slice_buffer * buf)
 567 {
 568     int i;
 569     slice_buffer_flush(buf);
 570
 571     for(i = buf->data_count - 1; i >= 0; i--){
 572         av_freep(&buf->data_stack[i]);
 573     }
 574     av_freep(&buf->data_stack);
 575     av_freep(&buf->line);
 576 }
 577
 578 #ifdef __sgi
 579 // Avoid a name clash on SGI IRIX
 580 #undef qexp
 581 #endif
 582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
 583 static uint8_t qexp[QROOT];
 584
 585 static inline int mirror(int v, int m){
 586     while((unsigned)v > (unsigned)m){
 587         v=-v;
 588         if(v<0) v+= 2*m;
 589     }
 590     return v;
 591 }
 592
 593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
 594     int i;
 595
 596     if(v){
 597         const int a= FFABS(v);
 598         const int e= av_log2(a);
 599 #if 1
 600         const int el= FFMIN(e, 10);
 601         put_rac(c, state+0, 0);
 602
 603         for(i=0; i<el; i++){
 604             put_rac(c, state+1+i, 1);  //1..10
 605         }
 606         for(; i<e; i++){
 607             put_rac(c, state+1+9, 1);  //1..10
 608         }
 609         put_rac(c, state+1+FFMIN(i,9), 0);
 610
 611         for(i=e-1; i>=el; i--){
 612             put_rac(c, state+22+9, (a>>i)&1); //22..31
 613         }
 614         for(; i>=0; i--){
 615             put_rac(c, state+22+i, (a>>i)&1); //22..31
 616         }
 617
 618         if(is_signed)
 619             put_rac(c, state+11 + el, v < 0); //11..21
 620 #else
 621
 622         put_rac(c, state+0, 0);
 623         if(e<=9){
 624             for(i=0; i<e; i++){
 625                 put_rac(c, state+1+i, 1);  //1..10
 626             }
 627             put_rac(c, state+1+i, 0);
 628
 629             for(i=e-1; i>=0; i--){
 630                 put_rac(c, state+22+i, (a>>i)&1); //22..31
 631             }
 632
 633             if(is_signed)
 634                 put_rac(c, state+11 + e, v < 0); //11..21
 635         }else{
 636             for(i=0; i<e; i++){
 637                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
 638             }
 639             put_rac(c, state+1+FFMIN(i,9), 0);
 640
 641             for(i=e-1; i>=0; i--){
 642                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
 643             }
 644
 645             if(is_signed)
 646                 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
 647         }
 648 #endif /* 1 */
 649     }else{
 650         put_rac(c, state+0, 1);
 651     }
 652 }
 653
 654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
 655     if(get_rac(c, state+0))
 656         return 0;
 657     else{
 658         int i, e, a;
 659         e= 0;
 660         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
 661             e++;
 662         }
 663
 664         a= 1;
 665         for(i=e-1; i>=0; i--){
 666             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
 667         }
 668
 669         if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
 670             return -a;
 671         else
 672             return a;
 673     }
 674 }
 675
 676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
 677     int i;
 678     int r= log2>=0 ? 1<<log2 : 1;
 679
 680     assert(v>=0);
 681     assert(log2>=-4);
 682
 683     while(v >= r){
 684         put_rac(c, state+4+log2, 1);
 685         v -= r;
 686         log2++;
 687         if(log2>0) r+=r;
 688     }
 689     put_rac(c, state+4+log2, 0);
 690
 691     for(i=log2-1; i>=0; i--){
 692         put_rac(c, state+31-i, (v>>i)&1);
 693     }
 694 }
 695
 696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
 697     int i;
 698     int r= log2>=0 ? 1<<log2 : 1;
 699     int v=0;
 700
 701     assert(log2>=-4);
 702
 703     while(get_rac(c, state+4+log2)){
 704         v+= r;
 705         log2++;
 706         if(log2>0) r+=r;
 707     }
 708
 709     for(i=log2-1; i>=0; i--){
 710         v+= get_rac(c, state+31-i)<<i;
 711     }
 712
 713     return v;
 714 }
 715
 716 static av_always_inline void
 717 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 718      int dst_step, int src_step, int ref_step,
 719      int width, int mul, int add, int shift,
 720      int highpass, int inverse){
 721     const int mirror_left= !highpass;
 722     const int mirror_right= (width&1) ^ highpass;
 723     const int w= (width>>1) - 1 + (highpass & width);
 724     int i;
 725
 726 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 727     if(mirror_left){
 728         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 729         dst += dst_step;
 730         src += src_step;
 731     }
 732
 733     for(i=0; i<w; i++){
 734         dst[i*dst_step] =
 735             LIFT(src[i*src_step],
 736                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 737                  inverse);
 738     }
 739
 740     if(mirror_right){
 741         dst[w*dst_step] =
 742             LIFT(src[w*src_step],
 743                  ((mul*2*ref[w*ref_step]+add)>>shift),
 744                  inverse);
 745     }
 746 }
 747
 748 static av_always_inline void
 749 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 750          int dst_step, int src_step, int ref_step,
 751          int width, int mul, int add, int shift,
 752          int highpass, int inverse){
 753     const int mirror_left= !highpass;
 754     const int mirror_right= (width&1) ^ highpass;
 755     const int w= (width>>1) - 1 + (highpass & width);
 756     int i;
 757
 758 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 759     if(mirror_left){
 760         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 761         dst += dst_step;
 762         src += src_step;
 763     }
 764
 765     for(i=0; i<w; i++){
 766         dst[i*dst_step] =
 767             LIFT(src[i*src_step],
 768                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 769                  inverse);
 770     }
 771
 772     if(mirror_right){
 773         dst[w*dst_step] =
 774             LIFT(src[w*src_step],
 775                  ((mul*2*ref[w*ref_step]+add)>>shift),
 776                  inverse);
 777     }
 778 }
 779
 780 #ifndef liftS
 781 static av_always_inline void
 782 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 783       int dst_step, int src_step, int ref_step,
 784       int width, int mul, int add, int shift,
 785       int highpass, int inverse){
 786     const int mirror_left= !highpass;
 787     const int mirror_right= (width&1) ^ highpass;
 788     const int w= (width>>1) - 1 + (highpass & width);
 789     int i;
 790
 791     assert(shift == 4);
 792 #define LIFTS(src, ref, inv) \
 793         ((inv) ? \
 794             (src) + (((ref) + 4*(src))>>shift): \
 795             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 796     if(mirror_left){
 797         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 798         dst += dst_step;
 799         src += src_step;
 800     }
 801
 802     for(i=0; i<w; i++){
 803         dst[i*dst_step] =
 804             LIFTS(src[i*src_step],
 805                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 806                   inverse);
 807     }
 808
 809     if(mirror_right){
 810         dst[w*dst_step] =
 811             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 812     }
 813 }
 814 static av_always_inline void
 815 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 816           int dst_step, int src_step, int ref_step,
 817           int width, int mul, int add, int shift,
 818           int highpass, int inverse){
 819     const int mirror_left= !highpass;
 820     const int mirror_right= (width&1) ^ highpass;
 821     const int w= (width>>1) - 1 + (highpass & width);
 822     int i;
 823
 824     assert(shift == 4);
 825 #define LIFTS(src, ref, inv) \
 826     ((inv) ? \
 827         (src) + (((ref) + 4*(src))>>shift): \
 828         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 829     if(mirror_left){
 830         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 831         dst += dst_step;
 832         src += src_step;
 833     }
 834
 835     for(i=0; i<w; i++){
 836         dst[i*dst_step] =
 837             LIFTS(src[i*src_step],
 838                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 839                   inverse);
 840     }
 841
 842     if(mirror_right){
 843         dst[w*dst_step] =
 844             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 845     }
 846 }
 847 #endif /* ! liftS */
 848
 849 static void horizontal_decompose53i(DWTELEM *b, int width){
 850     DWTELEM temp[width];
 851     const int width2= width>>1;
 852     int x;
 853     const int w2= (width+1)>>1;
 854
 855     for(x=0; x<width2; x++){
 856         temp[x   ]= b[2*x    ];
 857         temp[x+w2]= b[2*x + 1];
 858     }
 859     if(width&1)
 860         temp[x   ]= b[2*x    ];
 861 #if 0
 862     {
 863     int A1,A2,A3,A4;
 864     A2= temp[1       ];
 865     A4= temp[0       ];
 866     A1= temp[0+width2];
 867     A1 -= (A2 + A4)>>1;
 868     A4 += (A1 + 1)>>1;
 869     b[0+width2] = A1;
 870     b[0       ] = A4;
 871     for(x=1; x+1<width2; x+=2){
 872         A3= temp[x+width2];
 873         A4= temp[x+1     ];
 874         A3 -= (A2 + A4)>>1;
 875         A2 += (A1 + A3 + 2)>>2;
 876         b[x+width2] = A3;
 877         b[x       ] = A2;
 878
 879         A1= temp[x+1+width2];
 880         A2= temp[x+2       ];
 881         A1 -= (A2 + A4)>>1;
 882         A4 += (A1 + A3 + 2)>>2;
 883         b[x+1+width2] = A1;
 884         b[x+1       ] = A4;
 885     }
 886     A3= temp[width-1];
 887     A3 -= A2;
 888     A2 += (A1 + A3 + 2)>>2;
 889     b[width -1] = A3;
 890     b[width2-1] = A2;
 891     }
 892 #else
 893     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
 894     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
 895 #endif /* 0 */
 896 }
 897
 898 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 899     int i;
 900
 901     for(i=0; i<width; i++){
 902         b1[i] -= (b0[i] + b2[i])>>1;
 903     }
 904 }
 905
 906 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 907     int i;
 908
 909     for(i=0; i<width; i++){
 910         b1[i] += (b0[i] + b2[i] + 2)>>2;
 911     }
 912 }
 913
 914 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
 915     int y;
 916     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
 917     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
 918
 919     for(y=-2; y<height; y+=2){
 920         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
 921         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 922
 923         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
 924         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
 925
 926         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
 927         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
 928
 929         b0=b2;
 930         b1=b3;
 931     }
 932 }
 933
 934 static void horizontal_decompose97i(DWTELEM *b, int width){
 935     DWTELEM temp[width];
 936     const int w2= (width+1)>>1;
 937
 938     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
 939     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
 940     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
 941     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
 942 }
 943
 944
 945 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 946     int i;
 947
 948     for(i=0; i<width; i++){
 949         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
 950     }
 951 }
 952
 953 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 954     int i;
 955
 956     for(i=0; i<width; i++){
 957         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
 958     }
 959 }
 960
 961 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 962     int i;
 963
 964     for(i=0; i<width; i++){
 965 #ifdef liftS
 966         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
 967 #else
 968         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
 969 #endif
 970     }
 971 }
 972
 973 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 974     int i;
 975
 976     for(i=0; i<width; i++){
 977         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
 978     }
 979 }
 980
 981 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
 982     int y;
 983     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
 984     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
 985     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
 986     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
 987
 988     for(y=-4; y<height; y+=2){
 989         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
 990         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
 991
 992         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
 993         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
 994
 995         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
 996         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
 997         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
 998         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
 999
1000         b0=b2;
1001         b1=b3;
1002         b2=b4;
1003         b3=b5;
1004     }
1005 }
1006
1007 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1008     int level;
1009
1010     for(level=0; level<decomposition_count; level++){
1011         switch(type){
1012         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1013         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1014         }
1015     }
1016 }
1017
1018 static void horizontal_compose53i(IDWTELEM *b, int width){
1019     IDWTELEM temp[width];
1020     const int width2= width>>1;
1021     const int w2= (width+1)>>1;
1022     int x;
1023
1024 #if 0
1025     int A1,A2,A3,A4;
1026     A2= temp[1       ];
1027     A4= temp[0       ];
1028     A1= temp[0+width2];
1029     A1 -= (A2 + A4)>>1;
1030     A4 += (A1 + 1)>>1;
1031     b[0+width2] = A1;
1032     b[0       ] = A4;
1033     for(x=1; x+1<width2; x+=2){
1034         A3= temp[x+width2];
1035         A4= temp[x+1     ];
1036         A3 -= (A2 + A4)>>1;
1037         A2 += (A1 + A3 + 2)>>2;
1038         b[x+width2] = A3;
1039         b[x       ] = A2;
1040
1041         A1= temp[x+1+width2];
1042         A2= temp[x+2       ];
1043         A1 -= (A2 + A4)>>1;
1044         A4 += (A1 + A3 + 2)>>2;
1045         b[x+1+width2] = A1;
1046         b[x+1       ] = A4;
1047     }
1048     A3= temp[width-1];
1049     A3 -= A2;
1050     A2 += (A1 + A3 + 2)>>2;
1051     b[width -1] = A3;
1052     b[width2-1] = A2;
1053 #else
1054     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1055     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1056 #endif /* 0 */
1057     for(x=0; x<width2; x++){
1058         b[2*x    ]= temp[x   ];
1059         b[2*x + 1]= temp[x+w2];
1060     }
1061     if(width&1)
1062         b[2*x    ]= temp[x   ];
1063 }
1064
1065 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1066     int i;
1067
1068     for(i=0; i<width; i++){
1069         b1[i] += (b0[i] + b2[i])>>1;
1070     }
1071 }
1072
1073 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1074     int i;
1075
1076     for(i=0; i<width; i++){
1077         b1[i] -= (b0[i] + b2[i] + 2)>>2;
1078     }
1079 }
1080
1081 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1082     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1083     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1084     cs->y = -1;
1085 }
1086
1087 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1088     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1089     cs->b1 = buffer + mirror(-1  , height-1)*stride;
1090     cs->y = -1;
1091 }
1092
1093 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1094     int y= cs->y;
1095
1096     IDWTELEM *b0= cs->b0;
1097     IDWTELEM *b1= cs->b1;
1098     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1099     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1100
1101         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1102         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1103
1104         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1105         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1106
1107     cs->b0 = b2;
1108     cs->b1 = b3;
1109     cs->y += 2;
1110 }
1111
1112 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1113     int y= cs->y;
1114     IDWTELEM *b0= cs->b0;
1115     IDWTELEM *b1= cs->b1;
1116     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1117     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1118
1119         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1120         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1121
1122         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1123         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1124
1125     cs->b0 = b2;
1126     cs->b1 = b3;
1127     cs->y += 2;
1128 }
1129
1130 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1131     dwt_compose_t cs;
1132     spatial_compose53i_init(&cs, buffer, height, stride);
1133     while(cs.y <= height)
1134         spatial_compose53i_dy(&cs, buffer, width, height, stride);
1135 }
1136
1137
1138 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1139     IDWTELEM temp[width];
1140     const int w2= (width+1)>>1;
1141
1142     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1143     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1144     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1145     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1146 }
1147
1148 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1149     int i;
1150
1151     for(i=0; i<width; i++){
1152         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1153     }
1154 }
1155
1156 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1157     int i;
1158
1159     for(i=0; i<width; i++){
1160         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1161     }
1162 }
1163
1164 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1165     int i;
1166
1167     for(i=0; i<width; i++){
1168 #ifdef liftS
1169         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1170 #else
1171         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1172 #endif
1173     }
1174 }
1175
1176 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1177     int i;
1178
1179     for(i=0; i<width; i++){
1180         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1181     }
1182 }
1183
1184 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1185     int i;
1186
1187     for(i=0; i<width; i++){
1188         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1189         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1190 #ifdef liftS
1191         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1192 #else
1193         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1194 #endif
1195         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1196     }
1197 }
1198
1199 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1200     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1201     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1202     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1203     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1204     cs->y = -3;
1205 }
1206
1207 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1208     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1209     cs->b1 = buffer + mirror(-3  , height-1)*stride;
1210     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1211     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1212     cs->y = -3;
1213 }
1214
1215 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1216     int y = cs->y;
1217
1218     IDWTELEM *b0= cs->b0;
1219     IDWTELEM *b1= cs->b1;
1220     IDWTELEM *b2= cs->b2;
1221     IDWTELEM *b3= cs->b3;
1222     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1223     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1224
1225     if(y>0 && y+4<height){
1226         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1227     }else{
1228         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1229         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1230         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1231         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1232     }
1233
1234         if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1235         if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1236
1237     cs->b0=b2;
1238     cs->b1=b3;
1239     cs->b2=b4;
1240     cs->b3=b5;
1241     cs->y += 2;
1242 }
1243
1244 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1245     int y = cs->y;
1246     IDWTELEM *b0= cs->b0;
1247     IDWTELEM *b1= cs->b1;
1248     IDWTELEM *b2= cs->b2;
1249     IDWTELEM *b3= cs->b3;
1250     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1251     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1252
1253         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1254         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1255         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1256         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1257
1258         if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1259         if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1260
1261     cs->b0=b2;
1262     cs->b1=b3;
1263     cs->b2=b4;
1264     cs->b3=b5;
1265     cs->y += 2;
1266 }
1267
1268 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1269     dwt_compose_t cs;
1270     spatial_compose97i_init(&cs, buffer, height, stride);
1271     while(cs.y <= height)
1272         spatial_compose97i_dy(&cs, buffer, width, height, stride);
1273 }
1274
1275 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1276     int level;
1277     for(level=decomposition_count-1; level>=0; level--){
1278         switch(type){
1279         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1280         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1281         }
1282     }
1283 }
1284
1285 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1286     int level;
1287     for(level=decomposition_count-1; level>=0; level--){
1288         switch(type){
1289         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1290         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1291         }
1292     }
1293 }
1294
1295 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1296     const int support = type==1 ? 3 : 5;
1297     int level;
1298     if(type==2) return;
1299
1300     for(level=decomposition_count-1; level>=0; level--){
1301         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1302             switch(type){
1303             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1304                 break;
1305             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306                 break;
1307             }
1308         }
1309     }
1310 }
1311
1312 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1313     const int support = type==1 ? 3 : 5;
1314     int level;
1315     if(type==2) return;
1316
1317     for(level=decomposition_count-1; level>=0; level--){
1318         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1319             switch(type){
1320             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1321                 break;
1322             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323                 break;
1324             }
1325         }
1326     }
1327 }
1328
1329 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330         dwt_compose_t cs[MAX_DECOMPOSITIONS];
1331         int y;
1332         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1333         for(y=0; y<height; y+=4)
1334             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1335 }
1336
1337 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1338     const int w= b->width;
1339     const int h= b->height;
1340     int x, y;
1341
1342     if(1){
1343         int run=0;
1344         int runs[w*h];
1345         int run_index=0;
1346         int max_index;
1347
1348         for(y=0; y<h; y++){
1349             for(x=0; x<w; x++){
1350                 int v, p=0;
1351                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1352                 v= src[x + y*stride];
1353
1354                 if(y){
1355                     t= src[x + (y-1)*stride];
1356                     if(x){
1357                         lt= src[x - 1 + (y-1)*stride];
1358                     }
1359                     if(x + 1 < w){
1360                         rt= src[x + 1 + (y-1)*stride];
1361                     }
1362                 }
1363                 if(x){
1364                     l= src[x - 1 + y*stride];
1365                     /*if(x > 1){
1366                         if(orientation==1) ll= src[y + (x-2)*stride];
1367                         else               ll= src[x - 2 + y*stride];
1368                     }*/
1369                 }
1370                 if(parent){
1371                     int px= x>>1;
1372                     int py= y>>1;
1373                     if(px<b->parent->width && py<b->parent->height)
1374                         p= parent[px + py*2*stride];
1375                 }
1376                 if(!(/*ll|*/l|lt|t|rt|p)){
1377                     if(v){
1378                         runs[run_index++]= run;
1379                         run=0;
1380                     }else{
1381                         run++;
1382                     }
1383                 }
1384             }
1385         }
1386         max_index= run_index;
1387         runs[run_index++]= run;
1388         run_index=0;
1389         run= runs[run_index++];
1390
1391         put_symbol2(&s->c, b->state[30], max_index, 0);
1392         if(run_index <= max_index)
1393             put_symbol2(&s->c, b->state[1], run, 3);
1394
1395         for(y=0; y<h; y++){
1396             if(s->c.bytestream_end - s->c.bytestream < w*40){
1397                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1398                 return -1;
1399             }
1400             for(x=0; x<w; x++){
1401                 int v, p=0;
1402                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1403                 v= src[x + y*stride];
1404
1405                 if(y){
1406                     t= src[x + (y-1)*stride];
1407                     if(x){
1408                         lt= src[x - 1 + (y-1)*stride];
1409                     }
1410                     if(x + 1 < w){
1411                         rt= src[x + 1 + (y-1)*stride];
1412                     }
1413                 }
1414                 if(x){
1415                     l= src[x - 1 + y*stride];
1416                     /*if(x > 1){
1417                         if(orientation==1) ll= src[y + (x-2)*stride];
1418                         else               ll= src[x - 2 + y*stride];
1419                     }*/
1420                 }
1421                 if(parent){
1422                     int px= x>>1;
1423                     int py= y>>1;
1424                     if(px<b->parent->width && py<b->parent->height)
1425                         p= parent[px + py*2*stride];
1426                 }
1427                 if(/*ll|*/l|lt|t|rt|p){
1428                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1429
1430                     put_rac(&s->c, &b->state[0][context], !!v);
1431                 }else{
1432                     if(!run){
1433                         run= runs[run_index++];
1434
1435                         if(run_index <= max_index)
1436                             put_symbol2(&s->c, b->state[1], run, 3);
1437                         assert(v);
1438                     }else{
1439                         run--;
1440                         assert(!v);
1441                     }
1442                 }
1443                 if(v){
1444                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1445                     int l2= 2*FFABS(l) + (l<0);
1446                     int t2= 2*FFABS(t) + (t<0);
1447
1448                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1449                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1450                 }
1451             }
1452         }
1453     }
1454     return 0;
1455 }
1456
1457 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1458 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
1459 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
1460     return encode_subband_c0run(s, b, src, parent, stride, orientation);
1461 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
1462 }
1463
1464 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1465     const int w= b->width;
1466     const int h= b->height;
1467     int x,y;
1468
1469     if(1){
1470         int run, runs;
1471         x_and_coeff *xc= b->x_coeff;
1472         x_and_coeff *prev_xc= NULL;
1473         x_and_coeff *prev2_xc= xc;
1474         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1475         x_and_coeff *prev_parent_xc= parent_xc;
1476
1477         runs= get_symbol2(&s->c, b->state[30], 0);
1478         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1479         else           run= INT_MAX;
1480
1481         for(y=0; y<h; y++){
1482             int v=0;
1483             int lt=0, t=0, rt=0;
1484
1485             if(y && prev_xc->x == 0){
1486                 rt= prev_xc->coeff;
1487             }
1488             for(x=0; x<w; x++){
1489                 int p=0;
1490                 const int l= v;
1491
1492                 lt= t; t= rt;
1493
1494                 if(y){
1495                     if(prev_xc->x <= x)
1496                         prev_xc++;
1497                     if(prev_xc->x == x + 1)
1498                         rt= prev_xc->coeff;
1499                     else
1500                         rt=0;
1501                 }
1502                 if(parent_xc){
1503                     if(x>>1 > parent_xc->x){
1504                         parent_xc++;
1505                     }
1506                     if(x>>1 == parent_xc->x){
1507                         p= parent_xc->coeff;
1508                     }
1509                 }
1510                 if(/*ll|*/l|lt|t|rt|p){
1511                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1512
1513                     v=get_rac(&s->c, &b->state[0][context]);
1514                     if(v){
1515                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1516                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1517
1518                         xc->x=x;
1519                         (xc++)->coeff= v;
1520                     }
1521                 }else{
1522                     if(!run){
1523                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524                         else           run= INT_MAX;
1525                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1526                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1527
1528                         xc->x=x;
1529                         (xc++)->coeff= v;
1530                     }else{
1531                         int max_run;
1532                         run--;
1533                         v=0;
1534
1535                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1536                         else  max_run= FFMIN(run, w-x-1);
1537                         if(parent_xc)
1538                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1539                         x+= max_run;
1540                         run-= max_run;
1541                     }
1542                 }
1543             }
1544             (xc++)->x= w+1; //end marker
1545             prev_xc= prev2_xc;
1546             prev2_xc= xc;
1547
1548             if(parent_xc){
1549                 if(y&1){
1550                     while(parent_xc->x != parent->width+1)
1551                         parent_xc++;
1552                     parent_xc++;
1553                     prev_parent_xc= parent_xc;
1554                 }else{
1555                     parent_xc= prev_parent_xc;
1556                 }
1557             }
1558         }
1559
1560         (xc++)->x= w+1; //end marker
1561     }
1562 }
1563
1564 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1565     const int w= b->width;
1566     int y;
1567     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1568     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1569     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1570     int new_index = 0;
1571
1572     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1573         qadd= 0;
1574         qmul= 1<<QEXPSHIFT;
1575     }
1576
1577     /* If we are on the second or later slice, restore our index. */
1578     if (start_y != 0)
1579         new_index = save_state[0];
1580
1581
1582     for(y=start_y; y<h; y++){
1583         int x = 0;
1584         int v;
1585         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1586         memset(line, 0, b->width*sizeof(IDWTELEM));
1587         v = b->x_coeff[new_index].coeff;
1588         x = b->x_coeff[new_index++].x;
1589         while(x < w){
1590             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1591             register int u= -(v&1);
1592             line[x] = (t^u) - u;
1593
1594             v = b->x_coeff[new_index].coeff;
1595             x = b->x_coeff[new_index++].x;
1596         }
1597     }
1598
1599     /* Save our variables for the next slice. */
1600     save_state[0] = new_index;
1601
1602     return;
1603 }
1604
1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606     int plane_index, level, orientation;
1607
1608     for(plane_index=0; plane_index<3; plane_index++){
1609         for(level=0; level<MAX_DECOMPOSITIONS; level++){
1610             for(orientation=level ? 1:0; orientation<4; orientation++){
1611                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1612             }
1613         }
1614     }
1615     memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616     memset(s->block_state, MID_STATE, sizeof(s->block_state));
1617 }
1618
1619 static int alloc_blocks(SnowContext *s){
1620     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1622
1623     s->b_width = w;
1624     s->b_height= h;
1625
1626     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1627     return 0;
1628 }
1629
1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631     uint8_t *bytestream= d->bytestream;
1632     uint8_t *bytestream_start= d->bytestream_start;
1633     *d= *s;
1634     d->bytestream= bytestream;
1635     d->bytestream_start= bytestream_start;
1636 }
1637
1638 //near copy & paste from dsputil, FIXME
1639 static int pix_sum(uint8_t * pix, int line_size, int w)
1640 {
1641     int s, i, j;
1642
1643     s = 0;
1644     for (i = 0; i < w; i++) {
1645         for (j = 0; j < w; j++) {
1646             s += pix[0];
1647             pix ++;
1648         }
1649         pix += line_size - w;
1650     }
1651     return s;
1652 }
1653
1654 //near copy & paste from dsputil, FIXME
1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
1656 {
1657     int s, i, j;
1658     uint32_t *sq = ff_squareTbl + 256;
1659
1660     s = 0;
1661     for (i = 0; i < w; i++) {
1662         for (j = 0; j < w; j ++) {
1663             s += sq[pix[0]];
1664             pix ++;
1665         }
1666         pix += line_size - w;
1667     }
1668     return s;
1669 }
1670
1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672     const int w= s->b_width << s->block_max_depth;
1673     const int rem_depth= s->block_max_depth - level;
1674     const int index= (x + y*w) << rem_depth;
1675     const int block_w= 1<<rem_depth;
1676     BlockNode block;
1677     int i,j;
1678
1679     block.color[0]= l;
1680     block.color[1]= cb;
1681     block.color[2]= cr;
1682     block.mx= mx;
1683     block.my= my;
1684     block.ref= ref;
1685     block.type= type;
1686     block.level= level;
1687
1688     for(j=0; j<block_w; j++){
1689         for(i=0; i<block_w; i++){
1690             s->block[index + i + j*w]= block;
1691         }
1692     }
1693 }
1694
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696     const int offset[3]= {
1697           y*c->  stride + x,
1698         ((y*c->uvstride + x)>>1),
1699         ((y*c->uvstride + x)>>1),
1700     };
1701     int i;
1702     for(i=0; i<3; i++){
1703         c->src[0][i]= src [i];
1704         c->ref[0][i]= ref [i] + offset[i];
1705     }
1706     assert(!ref_index);
1707 }
1708
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711     if(s->ref_frames == 1){
1712         *mx = mid_pred(left->mx, top->mx, tr->mx);
1713         *my = mid_pred(left->my, top->my, tr->my);
1714     }else{
1715         const int *scale = scale_mv_ref[ref];
1716         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717                        (top ->mx * scale[top ->ref] + 128) >>8,
1718                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1719         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720                        (top ->my * scale[top ->ref] + 128) >>8,
1721                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
1722     }
1723 }
1724
1725 //FIXME copy&paste
1726 #define P_LEFT P[1]
1727 #define P_TOP P[2]
1728 #define P_TOPRIGHT P[3]
1729 #define P_MEDIAN P[4]
1730 #define P_MV1 P[9]
1731 #define FLAG_QPEL   1 //must be 1
1732
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734     uint8_t p_buffer[1024];
1735     uint8_t i_buffer[1024];
1736     uint8_t p_state[sizeof(s->block_state)];
1737     uint8_t i_state[sizeof(s->block_state)];
1738     RangeCoder pc, ic;
1739     uint8_t *pbbak= s->c.bytestream;
1740     uint8_t *pbbak_start= s->c.bytestream_start;
1741     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742     const int w= s->b_width  << s->block_max_depth;
1743     const int h= s->b_height << s->block_max_depth;
1744     const int rem_depth= s->block_max_depth - level;
1745     const int index= (x + y*w) << rem_depth;
1746     const int block_w= 1<<(LOG2_MB_SIZE - level);
1747     int trx= (x+1)<<rem_depth;
1748     int try= (y+1)<<rem_depth;
1749     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1750     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1751     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1754     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755     int pl = left->color[0];
1756     int pcb= left->color[1];
1757     int pcr= left->color[2];
1758     int pmx, pmy;
1759     int mx=0, my=0;
1760     int l,cr,cb;
1761     const int stride= s->current_picture.linesize[0];
1762     const int uvstride= s->current_picture.linesize[1];
1763     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1764                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1766     int P[10][2];
1767     int16_t last_mv[3][2];
1768     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769     const int shift= 1+qpel;
1770     MotionEstContext *c= &s->m.me;
1771     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773     int my_context= av_log2(2*FFABS(left->my - top->my));
1774     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775     int ref, best_ref, ref_score, ref_mx, ref_my;
1776
1777     assert(sizeof(s->block_state) >= 256);
1778     if(s->keyframe){
1779         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1780         return 0;
1781     }
1782
1783 //    clip predictors / edge ?
1784
1785     P_LEFT[0]= left->mx;
1786     P_LEFT[1]= left->my;
1787     P_TOP [0]= top->mx;
1788     P_TOP [1]= top->my;
1789     P_TOPRIGHT[0]= tr->mx;
1790     P_TOPRIGHT[1]= tr->my;
1791
1792     last_mv[0][0]= s->block[index].mx;
1793     last_mv[0][1]= s->block[index].my;
1794     last_mv[1][0]= right->mx;
1795     last_mv[1][1]= right->my;
1796     last_mv[2][0]= bottom->mx;
1797     last_mv[2][1]= bottom->my;
1798
1799     s->m.mb_stride=2;
1800     s->m.mb_x=
1801     s->m.mb_y= 0;
1802     c->skip= 0;
1803
1804     assert(c->  stride ==   stride);
1805     assert(c->uvstride == uvstride);
1806
1807     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1811
1812     c->xmin = - x*block_w - 16+2;
1813     c->ymin = - y*block_w - 16+2;
1814     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1816
1817     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1818     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1819     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1820     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1821     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1824
1825     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1827
1828     if (!y) {
1829         c->pred_x= P_LEFT[0];
1830         c->pred_y= P_LEFT[1];
1831     } else {
1832         c->pred_x = P_MEDIAN[0];
1833         c->pred_y = P_MEDIAN[1];
1834     }
1835
1836     score= INT_MAX;
1837     best_ref= 0;
1838     for(ref=0; ref<s->ref_frames; ref++){
1839         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1840
1841         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1843
1844         assert(ref_mx >= c->xmin);
1845         assert(ref_mx <= c->xmax);
1846         assert(ref_my >= c->ymin);
1847         assert(ref_my <= c->ymax);
1848
1849         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852         if(s->ref_mvs[ref]){
1853             s->ref_mvs[ref][index][0]= ref_mx;
1854             s->ref_mvs[ref][index][1]= ref_my;
1855             s->ref_scores[ref][index]= ref_score;
1856         }
1857         if(score > ref_score){
1858             score= ref_score;
1859             best_ref= ref;
1860             mx= ref_mx;
1861             my= ref_my;
1862         }
1863     }
1864     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1865
1866   //  subpel search
1867     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1868     pc= s->c;
1869     pc.bytestream_start=
1870     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871     memcpy(p_state, s->block_state, sizeof(s->block_state));
1872
1873     if(level!=s->block_max_depth)
1874         put_rac(&pc, &p_state[4 + s_context], 1);
1875     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876     if(s->ref_frames > 1)
1877         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881     p_len= pc.bytestream - pc.bytestream_start;
1882     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1883
1884     block_s= block_w*block_w;
1885     sum = pix_sum(current_data[0], stride, block_w);
1886     l= (sum + block_s/2)/block_s;
1887     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1888
1889     block_s= block_w*block_w>>2;
1890     sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891     cb= (sum + block_s/2)/block_s;
1892 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893     sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894     cr= (sum + block_s/2)/block_s;
1895 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1896
1897     ic= s->c;
1898     ic.bytestream_start=
1899     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900     memcpy(i_state, s->block_state, sizeof(s->block_state));
1901     if(level!=s->block_max_depth)
1902         put_rac(&ic, &i_state[4 + s_context], 1);
1903     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904     put_symbol(&ic, &i_state[32],  l-pl , 1);
1905     put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906     put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907     i_len= ic.bytestream - ic.bytestream_start;
1908     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1909
1910 //    assert(score==256*256*256*64-1);
1911     assert(iscore < 255*255*256 + s->lambda2*10);
1912     assert(iscore >= 0);
1913     assert(l>=0 && l<=255);
1914     assert(pl>=0 && pl<=255);
1915
1916     if(level==0){
1917         int varc= iscore >> 8;
1918         int vard= score >> 8;
1919         if (vard <= 64 || vard < varc)
1920             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1921         else
1922             c->scene_change_score+= s->m.qscale;
1923     }
1924
1925     if(level!=s->block_max_depth){
1926         put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1932
1933         if(score2 < score && score2 < iscore)
1934             return score2;
1935     }
1936
1937     if(iscore < score){
1938         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939         memcpy(pbbak, i_buffer, i_len);
1940         s->c= ic;
1941         s->c.bytestream_start= pbbak_start;
1942         s->c.bytestream= pbbak + i_len;
1943         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944         memcpy(s->block_state, i_state, sizeof(s->block_state));
1945         return iscore;
1946     }else{
1947         memcpy(pbbak, p_buffer, p_len);
1948         s->c= pc;
1949         s->c.bytestream_start= pbbak_start;
1950         s->c.bytestream= pbbak + p_len;
1951         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952         memcpy(s->block_state, p_state, sizeof(s->block_state));
1953         return score;
1954     }
1955 }
1956
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1960     }else{
1961         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1962     }
1963 }
1964
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966     const int w= s->b_width  << s->block_max_depth;
1967     const int rem_depth= s->block_max_depth - level;
1968     const int index= (x + y*w) << rem_depth;
1969     int trx= (x+1)<<rem_depth;
1970     BlockNode *b= &s->block[index];
1971     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1972     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1973     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1974     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975     int pl = left->color[0];
1976     int pcb= left->color[1];
1977     int pcr= left->color[2];
1978     int pmx, pmy;
1979     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1983
1984     if(s->keyframe){
1985         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1986         return;
1987     }
1988
1989     if(level!=s->block_max_depth){
1990         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991             put_rac(&s->c, &s->block_state[4 + s_context], 1);
1992         }else{
1993             put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1998             return;
1999         }
2000     }
2001     if(b->type & BLOCK_INTRA){
2002         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2008     }else{
2009         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011         if(s->ref_frames > 1)
2012             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2016     }
2017 }
2018
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020     const int w= s->b_width << s->block_max_depth;
2021     const int rem_depth= s->block_max_depth - level;
2022     const int index= (x + y*w) << rem_depth;
2023     int trx= (x+1)<<rem_depth;
2024     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2025     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2026     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2027     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2029
2030     if(s->keyframe){
2031         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2032         return;
2033     }
2034
2035     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2036         int type, mx, my;
2037         int l = left->color[0];
2038         int cb= left->color[1];
2039         int cr= left->color[2];
2040         int ref = 0;
2041         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2044
2045         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2046
2047         if(type){
2048             pred_mv(s, &mx, &my, 0, left, top, tr);
2049             l += get_symbol(&s->c, &s->block_state[32], 1);
2050             cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051             cr+= get_symbol(&s->c, &s->block_state[96], 1);
2052         }else{
2053             if(s->ref_frames > 1)
2054                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055             pred_mv(s, &mx, &my, ref, left, top, tr);
2056             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2058         }
2059         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2060     }else{
2061         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2065     }
2066 }
2067
2068 static void encode_blocks(SnowContext *s, int search){
2069     int x, y;
2070     int w= s->b_width;
2071     int h= s->b_height;
2072
2073     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2074         iterative_me(s);
2075
2076     for(y=0; y<h; y++){
2077         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2079             return;
2080         }
2081         for(x=0; x<w; x++){
2082             if(s->avctx->me_method == ME_ITER || !search)
2083                 encode_q_branch2(s, 0, x, y);
2084             else
2085                 encode_q_branch (s, 0, x, y);
2086         }
2087     }
2088 }
2089
2090 static void decode_blocks(SnowContext *s){
2091     int x, y;
2092     int w= s->b_width;
2093     int h= s->b_height;
2094
2095     for(y=0; y<h; y++){
2096         for(x=0; x<w; x++){
2097             decode_q_branch(s, 0, x, y);
2098         }
2099     }
2100 }
2101
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103     const static uint8_t weight[64]={
2104     8,7,6,5,4,3,2,1,
2105     7,7,0,0,0,0,0,1,
2106     6,0,6,0,0,0,2,0,
2107     5,0,0,5,0,3,0,0,
2108     4,0,0,0,4,0,0,0,
2109     3,0,0,5,0,3,0,0,
2110     2,0,6,0,0,0,2,0,
2111     1,7,0,0,0,0,0,1,
2112     };
2113
2114     const static uint8_t brane[256]={
2115     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2116     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2117     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2118     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2119     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2120     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2121     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2122     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2123     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2124     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2125     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2126     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2127     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2128     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2129     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2130     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2131     };
2132
2133     const static uint8_t needs[16]={
2134     0,1,0,0,
2135     2,4,2,0,
2136     0,1,0,0,
2137     15
2138     };
2139
2140     int x, y, b, r, l;
2141     int16_t tmpIt   [64*(32+HTAPS_MAX)];
2142     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2143     int16_t *tmpI= tmpIt;
2144     uint8_t *tmp2= tmp2t[0];
2145     const uint8_t *hpel[11];
2146     assert(dx<16 && dy<16);
2147     r= brane[dx + 16*dy]&15;
2148     l= brane[dx + 16*dy]>>4;
2149
2150     b= needs[l] | needs[r];
2151     if(p && !p->diag_mc)
2152         b= 15;
2153
2154     if(b&5){
2155         for(y=0; y < b_h+HTAPS_MAX-1; y++){
2156             for(x=0; x < b_w; x++){
2157                 int a_1=src[x + HTAPS_MAX/2-4];
2158                 int a0= src[x + HTAPS_MAX/2-3];
2159                 int a1= src[x + HTAPS_MAX/2-2];
2160                 int a2= src[x + HTAPS_MAX/2-1];
2161                 int a3= src[x + HTAPS_MAX/2+0];
2162                 int a4= src[x + HTAPS_MAX/2+1];
2163                 int a5= src[x + HTAPS_MAX/2+2];
2164                 int a6= src[x + HTAPS_MAX/2+3];
2165                 int am=0;
2166                 if(!p || p->fast_mc){
2167                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2168                     tmpI[x]= am;
2169                     am= (am+16)>>5;
2170                 }else{
2171                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2172                     tmpI[x]= am;
2173                     am= (am+32)>>6;
2174                 }
2175
2176                 if(am&(~255)) am= ~(am>>31);
2177                 tmp2[x]= am;
2178             }
2179             tmpI+= 64;
2180             tmp2+= stride;
2181             src += stride;
2182         }
2183         src -= stride*y;
2184     }
2185     src += HTAPS_MAX/2 - 1;
2186     tmp2= tmp2t[1];
2187
2188     if(b&2){
2189         for(y=0; y < b_h; y++){
2190             for(x=0; x < b_w+1; x++){
2191                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2192                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2193                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2194                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2195                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2196                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2197                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2198                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2199                 int am=0;
2200                 if(!p || p->fast_mc)
2201                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2202                 else
2203                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2204
2205                 if(am&(~255)) am= ~(am>>31);
2206                 tmp2[x]= am;
2207             }
2208             src += stride;
2209             tmp2+= stride;
2210         }
2211         src -= stride*y;
2212     }
2213     src += stride*(HTAPS_MAX/2 - 1);
2214     tmp2= tmp2t[2];
2215     tmpI= tmpIt;
2216     if(b&4){
2217         for(y=0; y < b_h; y++){
2218             for(x=0; x < b_w; x++){
2219                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2220                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2221                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2222                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2223                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2224                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2225                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2226                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2227                 int am=0;
2228                 if(!p || p->fast_mc)
2229                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2230                 else
2231                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2232                 if(am&(~255)) am= ~(am>>31);
2233                 tmp2[x]= am;
2234             }
2235             tmpI+= 64;
2236             tmp2+= stride;
2237         }
2238     }
2239
2240     hpel[ 0]= src;
2241     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2242     hpel[ 2]= src + 1;
2243
2244     hpel[ 4]= tmp2t[1];
2245     hpel[ 5]= tmp2t[2];
2246     hpel[ 6]= tmp2t[1] + 1;
2247
2248     hpel[ 8]= src + stride;
2249     hpel[ 9]= hpel[1] + stride;
2250     hpel[10]= hpel[8] + 1;
2251
2252     if(b==15){
2253         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2254         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2255         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2256         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2257         dx&=7;
2258         dy&=7;
2259         for(y=0; y < b_h; y++){
2260             for(x=0; x < b_w; x++){
2261                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2262                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2263             }
2264             src1+=stride;
2265             src2+=stride;
2266             src3+=stride;
2267             src4+=stride;
2268             dst +=stride;
2269         }
2270     }else{
2271         const uint8_t *src1= hpel[l];
2272         const uint8_t *src2= hpel[r];
2273         int a= weight[((dx&7) + (8*(dy&7)))];
2274         int b= 8-a;
2275         for(y=0; y < b_h; y++){
2276             for(x=0; x < b_w; x++){
2277                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2278             }
2279             src1+=stride;
2280             src2+=stride;
2281             dst +=stride;
2282         }
2283     }
2284 }
2285
2286 #define mca(dx,dy,b_w)\
2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2288     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2289     assert(h==b_w);\
2290     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2291 }
2292
2293 mca( 0, 0,16)
2294 mca( 8, 0,16)
2295 mca( 0, 8,16)
2296 mca( 8, 8,16)
2297 mca( 0, 0,8)
2298 mca( 8, 0,8)
2299 mca( 0, 8,8)
2300 mca( 8, 8,8)
2301
2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2303     if(block->type & BLOCK_INTRA){
2304         int x, y;
2305         const int color = block->color[plane_index];
2306         const int color4= color*0x01010101;
2307         if(b_w==32){
2308             for(y=0; y < b_h; y++){
2309                 *(uint32_t*)&dst[0 + y*stride]= color4;
2310                 *(uint32_t*)&dst[4 + y*stride]= color4;
2311                 *(uint32_t*)&dst[8 + y*stride]= color4;
2312                 *(uint32_t*)&dst[12+ y*stride]= color4;
2313                 *(uint32_t*)&dst[16+ y*stride]= color4;
2314                 *(uint32_t*)&dst[20+ y*stride]= color4;
2315                 *(uint32_t*)&dst[24+ y*stride]= color4;
2316                 *(uint32_t*)&dst[28+ y*stride]= color4;
2317             }
2318         }else if(b_w==16){
2319             for(y=0; y < b_h; y++){
2320                 *(uint32_t*)&dst[0 + y*stride]= color4;
2321                 *(uint32_t*)&dst[4 + y*stride]= color4;
2322                 *(uint32_t*)&dst[8 + y*stride]= color4;
2323                 *(uint32_t*)&dst[12+ y*stride]= color4;
2324             }
2325         }else if(b_w==8){
2326             for(y=0; y < b_h; y++){
2327                 *(uint32_t*)&dst[0 + y*stride]= color4;
2328                 *(uint32_t*)&dst[4 + y*stride]= color4;
2329             }
2330         }else if(b_w==4){
2331             for(y=0; y < b_h; y++){
2332                 *(uint32_t*)&dst[0 + y*stride]= color4;
2333             }
2334         }else{
2335             for(y=0; y < b_h; y++){
2336                 for(x=0; x < b_w; x++){
2337                     dst[x + y*stride]= color;
2338                 }
2339             }
2340         }
2341     }else{
2342         uint8_t *src= s->last_picture[block->ref].data[plane_index];
2343         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2344         int mx= block->mx*scale;
2345         int my= block->my*scale;
2346         const int dx= mx&15;
2347         const int dy= my&15;
2348         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2349         sx += (mx>>4) - (HTAPS_MAX/2-1);
2350         sy += (my>>4) - (HTAPS_MAX/2-1);
2351         src += sx + sy*stride;
2352         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2353            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2354             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2355             src= tmp + MB_SIZE;
2356         }
2357 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2358 //        assert(!(b_w&(b_w-1)));
2359         assert(b_w>1 && b_h>1);
2360         assert((tab_index>=0 && tab_index<4) || b_w==32);
2361         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2362             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2363         else if(b_w==32){
2364             int y;
2365             for(y=0; y<b_h; y+=16){
2366                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2367                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2368             }
2369         }else if(b_w==b_h)
2370             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2371         else if(b_w==2*b_h){
2372             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2373             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2374         }else{
2375             assert(2*b_w==b_h);
2376             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2377             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2378         }
2379     }
2380 }
2381
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2383                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2384     int y, x;
2385     IDWTELEM * dst;
2386     for(y=0; y<b_h; y++){
2387         //FIXME ugly misuse of obmc_stride
2388         const uint8_t *obmc1= obmc + y*obmc_stride;
2389         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2390         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2391         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2392         dst = slice_buffer_get_line(sb, src_y + y);
2393         for(x=0; x<b_w; x++){
2394             int v=   obmc1[x] * block[3][x + y*src_stride]
2395                     +obmc2[x] * block[2][x + y*src_stride]
2396                     +obmc3[x] * block[1][x + y*src_stride]
2397                     +obmc4[x] * block[0][x + y*src_stride];
2398
2399             v <<= 8 - LOG2_OBMC_MAX;
2400             if(FRAC_BITS != 8){
2401                 v >>= 8 - FRAC_BITS;
2402             }
2403             if(add){
2404                 v += dst[x + src_x];
2405                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2406                 if(v&(~255)) v= ~(v>>31);
2407                 dst8[x + y*src_stride] = v;
2408             }else{
2409                 dst[x + src_x] -= v;
2410             }
2411         }
2412     }
2413 }
2414
2415 //FIXME name cleanup (b_w, block_w, b_width stuff)
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2417     const int b_width = s->b_width  << s->block_max_depth;
2418     const int b_height= s->b_height << s->block_max_depth;
2419     const int b_stride= b_width;
2420     BlockNode *lt= &s->block[b_x + b_y*b_stride];
2421     BlockNode *rt= lt+1;
2422     BlockNode *lb= lt+b_stride;
2423     BlockNode *rb= lb+1;
2424     uint8_t *block[4];
2425     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2426     uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2427     uint8_t *ptmp;
2428     int x,y;
2429
2430     if(b_x<0){
2431         lt= rt;
2432         lb= rb;
2433     }else if(b_x + 1 >= b_width){
2434         rt= lt;
2435         rb= lb;
2436     }
2437     if(b_y<0){
2438         lt= lb;
2439         rt= rb;
2440     }else if(b_y + 1 >= b_height){
2441         lb= lt;
2442         rb= rt;
2443     }
2444
2445     if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2446         obmc -= src_x;
2447         b_w += src_x;
2448         if(!sliced && !offset_dst)
2449             dst -= src_x;
2450         src_x=0;
2451     }else if(src_x + b_w > w){
2452         b_w = w - src_x;
2453     }
2454     if(src_y<0){
2455         obmc -= src_y*obmc_stride;
2456         b_h += src_y;
2457         if(!sliced && !offset_dst)
2458             dst -= src_y*dst_stride;
2459         src_y=0;
2460     }else if(src_y + b_h> h){
2461         b_h = h - src_y;
2462     }
2463
2464     if(b_w<=0 || b_h<=0) return;
2465
2466     assert(src_stride > 2*MB_SIZE + 5);
2467
2468     if(!sliced && offset_dst)
2469         dst += src_x + src_y*dst_stride;
2470     dst8+= src_x + src_y*src_stride;
2471 //    src += src_x + src_y*src_stride;
2472
2473     ptmp= tmp + 3*tmp_step;
2474     block[0]= ptmp;
2475     ptmp+=tmp_step;
2476     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2477
2478     if(same_block(lt, rt)){
2479         block[1]= block[0];
2480     }else{
2481         block[1]= ptmp;
2482         ptmp+=tmp_step;
2483         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2484     }
2485
2486     if(same_block(lt, lb)){
2487         block[2]= block[0];
2488     }else if(same_block(rt, lb)){
2489         block[2]= block[1];
2490     }else{
2491         block[2]= ptmp;
2492         ptmp+=tmp_step;
2493         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2494     }
2495
2496     if(same_block(lt, rb) ){
2497         block[3]= block[0];
2498     }else if(same_block(rt, rb)){
2499         block[3]= block[1];
2500     }else if(same_block(lb, rb)){
2501         block[3]= block[2];
2502     }else{
2503         block[3]= ptmp;
2504         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2505     }
2506 #if 0
2507     for(y=0; y<b_h; y++){
2508         for(x=0; x<b_w; x++){
2509             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2510             if(add) dst[x + y*dst_stride] += v;
2511             else    dst[x + y*dst_stride] -= v;
2512         }
2513     }
2514     for(y=0; y<b_h; y++){
2515         uint8_t *obmc2= obmc + (obmc_stride>>1);
2516         for(x=0; x<b_w; x++){
2517             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2518             if(add) dst[x + y*dst_stride] += v;
2519             else    dst[x + y*dst_stride] -= v;
2520         }
2521     }
2522     for(y=0; y<b_h; y++){
2523         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2524         for(x=0; x<b_w; x++){
2525             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2526             if(add) dst[x + y*dst_stride] += v;
2527             else    dst[x + y*dst_stride] -= v;
2528         }
2529     }
2530     for(y=0; y<b_h; y++){
2531         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2532         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533         for(x=0; x<b_w; x++){
2534             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2535             if(add) dst[x + y*dst_stride] += v;
2536             else    dst[x + y*dst_stride] -= v;
2537         }
2538     }
2539 #else
2540     if(sliced){
2541         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2542     }else{
2543         for(y=0; y<b_h; y++){
2544             //FIXME ugly misuse of obmc_stride
2545             const uint8_t *obmc1= obmc + y*obmc_stride;
2546             const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2547             const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2548             const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2549             for(x=0; x<b_w; x++){
2550                 int v=   obmc1[x] * block[3][x + y*src_stride]
2551                         +obmc2[x] * block[2][x + y*src_stride]
2552                         +obmc3[x] * block[1][x + y*src_stride]
2553                         +obmc4[x] * block[0][x + y*src_stride];
2554
2555                 v <<= 8 - LOG2_OBMC_MAX;
2556                 if(FRAC_BITS != 8){
2557                     v >>= 8 - FRAC_BITS;
2558                 }
2559                 if(add){
2560                     v += dst[x + y*dst_stride];
2561                     v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2562                     if(v&(~255)) v= ~(v>>31);
2563                     dst8[x + y*src_stride] = v;
2564                 }else{
2565                     dst[x + y*dst_stride] -= v;
2566                 }
2567             }
2568         }
2569     }
2570 #endif /* 0 */
2571 }
2572
2573 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2574     Plane *p= &s->plane[plane_index];
2575     const int mb_w= s->b_width  << s->block_max_depth;
2576     const int mb_h= s->b_height << s->block_max_depth;
2577     int x, y, mb_x;
2578     int block_size = MB_SIZE >> s->block_max_depth;
2579     int block_w    = plane_index ? block_size/2 : block_size;
2580     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2581     int obmc_stride= plane_index ? block_size : 2*block_size;
2582     int ref_stride= s->current_picture.linesize[plane_index];
2583     uint8_t *dst8= s->current_picture.data[plane_index];
2584     int w= p->width;
2585     int h= p->height;
2586
2587     if(s->keyframe || (s->avctx->debug&512)){
2588         if(mb_y==mb_h)
2589             return;
2590
2591         if(add){
2592             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2593 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2594                 IDWTELEM * line = sb->line[y];
2595                 for(x=0; x<w; x++){
2596 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2597                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2598                     v >>= FRAC_BITS;
2599                     if(v&(~255)) v= ~(v>>31);
2600                     dst8[x + y*ref_stride]= v;
2601                 }
2602             }
2603         }else{
2604             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2605 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2606                 IDWTELEM * line = sb->line[y];
2607                 for(x=0; x<w; x++){
2608                     line[x] -= 128 << FRAC_BITS;
2609 //                    buf[x + y*w]-= 128<<FRAC_BITS;
2610                 }
2611             }
2612         }
2613
2614         return;
2615     }
2616
2617     for(mb_x=0; mb_x<=mb_w; mb_x++){
2618         add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2619                    block_w*mb_x - block_w/2,
2620                    block_w*mb_y - block_w/2,
2621                    block_w, block_w,
2622                    w, h,
2623                    w, ref_stride, obmc_stride,
2624                    mb_x - 1, mb_y - 1,
2625                    add, 0, plane_index);
2626     }
2627 }
2628
2629 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2630     Plane *p= &s->plane[plane_index];
2631     const int mb_w= s->b_width  << s->block_max_depth;
2632     const int mb_h= s->b_height << s->block_max_depth;
2633     int x, y, mb_x;
2634     int block_size = MB_SIZE >> s->block_max_depth;
2635     int block_w    = plane_index ? block_size/2 : block_size;
2636     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2637     const int obmc_stride= plane_index ? block_size : 2*block_size;
2638     int ref_stride= s->current_picture.linesize[plane_index];
2639     uint8_t *dst8= s->current_picture.data[plane_index];
2640     int w= p->width;
2641     int h= p->height;
2642
2643     if(s->keyframe || (s->avctx->debug&512)){
2644         if(mb_y==mb_h)
2645             return;
2646
2647         if(add){
2648             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2649                 for(x=0; x<w; x++){
2650                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2651                     v >>= FRAC_BITS;
2652                     if(v&(~255)) v= ~(v>>31);
2653                     dst8[x + y*ref_stride]= v;
2654                 }
2655             }
2656         }else{
2657             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2658                 for(x=0; x<w; x++){
2659                     buf[x + y*w]-= 128<<FRAC_BITS;
2660                 }
2661             }
2662         }
2663
2664         return;
2665     }
2666
2667     for(mb_x=0; mb_x<=mb_w; mb_x++){
2668         add_yblock(s, 0, NULL, buf, dst8, obmc,
2669                    block_w*mb_x - block_w/2,
2670                    block_w*mb_y - block_w/2,
2671                    block_w, block_w,
2672                    w, h,
2673                    w, ref_stride, obmc_stride,
2674                    mb_x - 1, mb_y - 1,
2675                    add, 1, plane_index);
2676     }
2677 }
2678
2679 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2680     const int mb_h= s->b_height << s->block_max_depth;
2681     int mb_y;
2682     for(mb_y=0; mb_y<=mb_h; mb_y++)
2683         predict_slice(s, buf, plane_index, add, mb_y);
2684 }
2685
2686 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2687     int i, x2, y2;
2688     Plane *p= &s->plane[plane_index];
2689     const int block_size = MB_SIZE >> s->block_max_depth;
2690     const int block_w    = plane_index ? block_size/2 : block_size;
2691     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2692     const int obmc_stride= plane_index ? block_size : 2*block_size;
2693     const int ref_stride= s->current_picture.linesize[plane_index];
2694     uint8_t *src= s-> input_picture.data[plane_index];
2695     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2696     const int b_stride = s->b_width << s->block_max_depth;
2697     const int w= p->width;
2698     const int h= p->height;
2699     int index= mb_x + mb_y*b_stride;
2700     BlockNode *b= &s->block[index];
2701     BlockNode backup= *b;
2702     int ab=0;
2703     int aa=0;
2704
2705     b->type|= BLOCK_INTRA;
2706     b->color[plane_index]= 0;
2707     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2708
2709     for(i=0; i<4; i++){
2710         int mb_x2= mb_x + (i &1) - 1;
2711         int mb_y2= mb_y + (i>>1) - 1;
2712         int x= block_w*mb_x2 + block_w/2;
2713         int y= block_w*mb_y2 + block_w/2;
2714
2715         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2716                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2717
2718         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2719             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2720                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2721                 int obmc_v= obmc[index];
2722                 int d;
2723                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2724                 if(x<0) obmc_v += obmc[index + block_w];
2725                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2726                 if(x+block_w>w) obmc_v += obmc[index - block_w];
2727                 //FIXME precalculate this or simplify it somehow else
2728
2729                 d = -dst[index] + (1<<(FRAC_BITS-1));
2730                 dst[index] = d;
2731                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2732                 aa += obmc_v * obmc_v; //FIXME precalculate this
2733             }
2734         }
2735     }
2736     *b= backup;
2737
2738     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2739 }
2740
2741 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2742     const int b_stride = s->b_width << s->block_max_depth;
2743     const int b_height = s->b_height<< s->block_max_depth;
2744     int index= x + y*b_stride;
2745     const BlockNode *b     = &s->block[index];
2746     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2747     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2748     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2749     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2750     int dmx, dmy;
2751 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2752 //  int my_context= av_log2(2*FFABS(left->my - top->my));
2753
2754     if(x<0 || x>=b_stride || y>=b_height)
2755         return 0;
2756 /*
2757 1            0      0
2758 01X          1-2    1
2759 001XX        3-6    2-3
2760 0001XXX      7-14   4-7
2761 00001XXXX   15-30   8-15
2762 */
2763 //FIXME try accurate rate
2764 //FIXME intra and inter predictors if surrounding blocks are not the same type
2765     if(b->type & BLOCK_INTRA){
2766         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2767                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
2768                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
2769     }else{
2770         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2771         dmx-= b->mx;
2772         dmy-= b->my;
2773         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2774                     + av_log2(2*FFABS(dmy))
2775                     + av_log2(2*b->ref));
2776     }
2777 }
2778
2779 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2780     Plane *p= &s->plane[plane_index];
2781     const int block_size = MB_SIZE >> s->block_max_depth;
2782     const int block_w    = plane_index ? block_size/2 : block_size;
2783     const int obmc_stride= plane_index ? block_size : 2*block_size;
2784     const int ref_stride= s->current_picture.linesize[plane_index];
2785     uint8_t *dst= s->current_picture.data[plane_index];
2786     uint8_t *src= s->  input_picture.data[plane_index];
2787     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2788     uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2789     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2790     const int b_stride = s->b_width << s->block_max_depth;
2791     const int b_height = s->b_height<< s->block_max_depth;
2792     const int w= p->width;
2793     const int h= p->height;
2794     int distortion;
2795     int rate= 0;
2796     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2797     int sx= block_w*mb_x - block_w/2;
2798     int sy= block_w*mb_y - block_w/2;
2799     int x0= FFMAX(0,-sx);
2800     int y0= FFMAX(0,-sy);
2801     int x1= FFMIN(block_w*2, w-sx);
2802     int y1= FFMIN(block_w*2, h-sy);
2803     int i,x,y;
2804
2805     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2806
2807     for(y=y0; y<y1; y++){
2808         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2809         const IDWTELEM *pred1 = pred + y*obmc_stride;
2810         uint8_t *cur1 = cur + y*ref_stride;
2811         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2812         for(x=x0; x<x1; x++){
2813 #if FRAC_BITS >= LOG2_OBMC_MAX
2814             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2815 #else
2816             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2817 #endif
2818             v = (v + pred1[x]) >> FRAC_BITS;
2819             if(v&(~255)) v= ~(v>>31);
2820             dst1[x] = v;
2821         }
2822     }
2823
2824     /* copy the regions where obmc[] = (uint8_t)256 */
2825     if(LOG2_OBMC_MAX == 8
2826         && (mb_x == 0 || mb_x == b_stride-1)
2827         && (mb_y == 0 || mb_y == b_height-1)){
2828         if(mb_x == 0)
2829             x1 = block_w;
2830         else
2831             x0 = block_w;
2832         if(mb_y == 0)
2833             y1 = block_w;
2834         else
2835             y0 = block_w;
2836         for(y=y0; y<y1; y++)
2837             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2838     }
2839
2840     if(block_w==16){
2841         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2842         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2843         /* FIXME cmps overlap but do not cover the wavelet's whole support.
2844          * So improving the score of one block is not strictly guaranteed
2845          * to improve the score of the whole frame, thus iterative motion
2846          * estimation does not always converge. */
2847         if(s->avctx->me_cmp == FF_CMP_W97)
2848             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2849         else if(s->avctx->me_cmp == FF_CMP_W53)
2850             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2851         else{
2852             distortion = 0;
2853             for(i=0; i<4; i++){
2854                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2855                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2856             }
2857         }
2858     }else{
2859         assert(block_w==8);
2860         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2861     }
2862
2863     if(plane_index==0){
2864         for(i=0; i<4; i++){
2865 /* ..RRr
2866  * .RXx.
2867  * rxx..
2868  */
2869             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2870         }
2871         if(mb_x == b_stride-2)
2872             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2873     }
2874     return distortion + rate*penalty_factor;
2875 }
2876
2877 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2878     int i, y2;
2879     Plane *p= &s->plane[plane_index];
2880     const int block_size = MB_SIZE >> s->block_max_depth;
2881     const int block_w    = plane_index ? block_size/2 : block_size;
2882     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2883     const int obmc_stride= plane_index ? block_size : 2*block_size;
2884     const int ref_stride= s->current_picture.linesize[plane_index];
2885     uint8_t *dst= s->current_picture.data[plane_index];
2886     uint8_t *src= s-> input_picture.data[plane_index];
2887     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2888     // const has only been removed from zero_dst to suppress a warning
2889     static IDWTELEM zero_dst[4096]; //FIXME
2890     const int b_stride = s->b_width << s->block_max_depth;
2891     const int w= p->width;
2892     const int h= p->height;
2893     int distortion= 0;
2894     int rate= 0;
2895     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2896
2897     for(i=0; i<9; i++){
2898         int mb_x2= mb_x + (i%3) - 1;
2899         int mb_y2= mb_y + (i/3) - 1;
2900         int x= block_w*mb_x2 + block_w/2;
2901         int y= block_w*mb_y2 + block_w/2;
2902
2903         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2904                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2905
2906         //FIXME find a cleaner/simpler way to skip the outside stuff
2907         for(y2= y; y2<0; y2++)
2908             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2909         for(y2= h; y2<y+block_w; y2++)
2910             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911         if(x<0){
2912             for(y2= y; y2<y+block_w; y2++)
2913                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2914         }
2915         if(x+block_w > w){
2916             for(y2= y; y2<y+block_w; y2++)
2917                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2918         }
2919
2920         assert(block_w== 8 || block_w==16);
2921         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2922     }
2923
2924     if(plane_index==0){
2925         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2926         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2927
2928 /* ..RRRr
2929  * .RXXx.
2930  * .RXXx.
2931  * rxxx.
2932  */
2933         if(merged)
2934             rate = get_block_bits(s, mb_x, mb_y, 2);
2935         for(i=merged?4:0; i<9; i++){
2936             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2937             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2938         }
2939     }
2940     return distortion + rate*penalty_factor;
2941 }
2942
2943 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2944     const int b_stride= s->b_width << s->block_max_depth;
2945     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2946     BlockNode backup= *block;
2947     int rd, index, value;
2948
2949     assert(mb_x>=0 && mb_y>=0);
2950     assert(mb_x<b_stride);
2951
2952     if(intra){
2953         block->color[0] = p[0];
2954         block->color[1] = p[1];
2955         block->color[2] = p[2];
2956         block->type |= BLOCK_INTRA;
2957     }else{
2958         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2959         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2960         if(s->me_cache[index] == value)
2961             return 0;
2962         s->me_cache[index]= value;
2963
2964         block->mx= p[0];
2965         block->my= p[1];
2966         block->type &= ~BLOCK_INTRA;
2967     }
2968
2969     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2970
2971 //FIXME chroma
2972     if(rd < *best_rd){
2973         *best_rd= rd;
2974         return 1;
2975     }else{
2976         *block= backup;
2977         return 0;
2978     }
2979 }
2980
2981 /* special case for int[2] args we discard afterwards,
2982  * fixes compilation problem with gcc 2.95 */
2983 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2984     int p[2] = {p0, p1};
2985     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2986 }
2987
2988 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2989     const int b_stride= s->b_width << s->block_max_depth;
2990     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2991     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2992     int rd, index, value;
2993
2994     assert(mb_x>=0 && mb_y>=0);
2995     assert(mb_x<b_stride);
2996     assert(((mb_x|mb_y)&1) == 0);
2997
2998     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2999     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3000     if(s->me_cache[index] == value)
3001         return 0;
3002     s->me_cache[index]= value;
3003
3004     block->mx= p0;
3005     block->my= p1;
3006     block->ref= ref;
3007     block->type &= ~BLOCK_INTRA;
3008     block[1]= block[b_stride]= block[b_stride+1]= *block;
3009
3010     rd= get_4block_rd(s, mb_x, mb_y, 0);
3011
3012 //FIXME chroma
3013     if(rd < *best_rd){
3014         *best_rd= rd;
3015         return 1;
3016     }else{
3017         block[0]= backup[0];
3018         block[1]= backup[1];
3019         block[b_stride]= backup[2];
3020         block[b_stride+1]= backup[3];
3021         return 0;
3022     }
3023 }
3024
3025 static void iterative_me(SnowContext *s){
3026     int pass, mb_x, mb_y;
3027     const int b_width = s->b_width  << s->block_max_depth;
3028     const int b_height= s->b_height << s->block_max_depth;
3029     const int b_stride= b_width;
3030     int color[3];
3031
3032     {
3033         RangeCoder r = s->c;
3034         uint8_t state[sizeof(s->block_state)];
3035         memcpy(state, s->block_state, sizeof(s->block_state));
3036         for(mb_y= 0; mb_y<s->b_height; mb_y++)
3037             for(mb_x= 0; mb_x<s->b_width; mb_x++)
3038                 encode_q_branch(s, 0, mb_x, mb_y);
3039         s->c = r;
3040         memcpy(s->block_state, state, sizeof(s->block_state));
3041     }
3042
3043     for(pass=0; pass<25; pass++){
3044         int change= 0;
3045
3046         for(mb_y= 0; mb_y<b_height; mb_y++){
3047             for(mb_x= 0; mb_x<b_width; mb_x++){
3048                 int dia_change, i, j, ref;
3049                 int best_rd= INT_MAX, ref_rd;
3050                 BlockNode backup, ref_b;
3051                 const int index= mb_x + mb_y * b_stride;
3052                 BlockNode *block= &s->block[index];
3053                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3054                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3055                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3056                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3057                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3058                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3059                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3060                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3061                 const int b_w= (MB_SIZE >> s->block_max_depth);
3062                 uint8_t obmc_edged[b_w*2][b_w*2];
3063
3064                 if(pass && (block->type & BLOCK_OPT))
3065                     continue;
3066                 block->type |= BLOCK_OPT;
3067
3068                 backup= *block;
3069
3070                 if(!s->me_cache_generation)
3071                     memset(s->me_cache, 0, sizeof(s->me_cache));
3072                 s->me_cache_generation += 1<<22;
3073
3074                 //FIXME precalculate
3075                 {
3076                     int x, y;
3077                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3078                     if(mb_x==0)
3079                         for(y=0; y<b_w*2; y++)
3080                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3081                     if(mb_x==b_stride-1)
3082                         for(y=0; y<b_w*2; y++)
3083                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3084                     if(mb_y==0){
3085                         for(x=0; x<b_w*2; x++)
3086                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
3087                         for(y=1; y<b_w; y++)
3088                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3089                     }
3090                     if(mb_y==b_height-1){
3091                         for(x=0; x<b_w*2; x++)
3092                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3093                         for(y=b_w; y<b_w*2-1; y++)
3094                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3095                     }
3096                 }
3097
3098                 //skip stuff outside the picture
3099                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3100                     uint8_t *src= s->  input_picture.data[0];
3101                     uint8_t *dst= s->current_picture.data[0];
3102                     const int stride= s->current_picture.linesize[0];
3103                     const int block_w= MB_SIZE >> s->block_max_depth;
3104                     const int sx= block_w*mb_x - block_w/2;
3105                     const int sy= block_w*mb_y - block_w/2;
3106                     const int w= s->plane[0].width;
3107                     const int h= s->plane[0].height;
3108                     int y;
3109
3110                     for(y=sy; y<0; y++)
3111                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3112                     for(y=h; y<sy+block_w*2; y++)
3113                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114                     if(sx<0){
3115                         for(y=sy; y<sy+block_w*2; y++)
3116                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3117                     }
3118                     if(sx+block_w*2 > w){
3119                         for(y=sy; y<sy+block_w*2; y++)
3120                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3121                     }
3122                 }
3123
3124                 // intra(black) = neighbors' contribution to the current block
3125                 for(i=0; i<3; i++)
3126                     color[i]= get_dc(s, mb_x, mb_y, i);
3127
3128                 // get previous score (cannot be cached due to OBMC)
3129                 if(pass > 0 && (block->type&BLOCK_INTRA)){
3130                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
3131                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3132                 }else
3133                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3134
3135                 ref_b= *block;
3136                 ref_rd= best_rd;
3137                 for(ref=0; ref < s->ref_frames; ref++){
3138                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3139                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3140                         continue;
3141                     block->ref= ref;
3142                     best_rd= INT_MAX;
3143
3144                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3145                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3146                     if(tb)
3147                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3148                     if(lb)
3149                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3150                     if(rb)
3151                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3152                     if(bb)
3153                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3154
3155                     /* fullpel ME */
3156                     //FIXME avoid subpel interpolation / round to nearest integer
3157                     do{
3158                         dia_change=0;
3159                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3160                             for(j=0; j<i; j++){
3161                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3162                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3163                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3164                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3165                             }
3166                         }
3167                     }while(dia_change);
3168                     /* subpel ME */
3169                     do{
3170                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3171                         dia_change=0;
3172                         for(i=0; i<8; i++)
3173                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3174                     }while(dia_change);
3175                     //FIXME or try the standard 2 pass qpel or similar
3176
3177                     mvr[0][0]= block->mx;
3178                     mvr[0][1]= block->my;
3179                     if(ref_rd > best_rd){
3180                         ref_rd= best_rd;
3181                         ref_b= *block;
3182                     }
3183                 }
3184                 best_rd= ref_rd;
3185                 *block= ref_b;
3186 #if 1
3187                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3188                 //FIXME RD style color selection
3189 #endif
3190                 if(!same_block(block, &backup)){
3191                     if(tb ) tb ->type &= ~BLOCK_OPT;
3192                     if(lb ) lb ->type &= ~BLOCK_OPT;
3193                     if(rb ) rb ->type &= ~BLOCK_OPT;
3194                     if(bb ) bb ->type &= ~BLOCK_OPT;
3195                     if(tlb) tlb->type &= ~BLOCK_OPT;
3196                     if(trb) trb->type &= ~BLOCK_OPT;
3197                     if(blb) blb->type &= ~BLOCK_OPT;
3198                     if(brb) brb->type &= ~BLOCK_OPT;
3199                     change ++;
3200                 }
3201             }
3202         }
3203         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3204         if(!change)
3205             break;
3206     }
3207
3208     if(s->block_max_depth == 1){
3209         int change= 0;
3210         for(mb_y= 0; mb_y<b_height; mb_y+=2){
3211             for(mb_x= 0; mb_x<b_width; mb_x+=2){
3212                 int i;
3213                 int best_rd, init_rd;
3214                 const int index= mb_x + mb_y * b_stride;
3215                 BlockNode *b[4];
3216
3217                 b[0]= &s->block[index];
3218                 b[1]= b[0]+1;
3219                 b[2]= b[0]+b_stride;
3220                 b[3]= b[2]+1;
3221                 if(same_block(b[0], b[1]) &&
3222                    same_block(b[0], b[2]) &&
3223                    same_block(b[0], b[3]))
3224                     continue;
3225
3226                 if(!s->me_cache_generation)
3227                     memset(s->me_cache, 0, sizeof(s->me_cache));
3228                 s->me_cache_generation += 1<<22;
3229
3230                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3231
3232                 //FIXME more multiref search?
3233                 check_4block_inter(s, mb_x, mb_y,
3234                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3235                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3236
3237                 for(i=0; i<4; i++)
3238                     if(!(b[i]->type&BLOCK_INTRA))
3239                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3240
3241                 if(init_rd != best_rd)
3242                     change++;
3243             }
3244         }
3245         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3246     }
3247 }
3248
3249 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3250     const int w= b->width;
3251     const int h= b->height;
3252     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3253     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3254     int x,y, thres1, thres2;
3255
3256     if(s->qlog == LOSSLESS_QLOG){
3257         for(y=0; y<h; y++)
3258             for(x=0; x<w; x++)
3259                 dst[x + y*stride]= src[x + y*stride];
3260         return;
3261     }
3262
3263     bias= bias ? 0 : (3*qmul)>>3;
3264     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3265     thres2= 2*thres1;
3266
3267     if(!bias){
3268         for(y=0; y<h; y++){
3269             for(x=0; x<w; x++){
3270                 int i= src[x + y*stride];
3271
3272                 if((unsigned)(i+thres1) > thres2){
3273                     if(i>=0){
3274                         i<<= QEXPSHIFT;
3275                         i/= qmul; //FIXME optimize
3276                         dst[x + y*stride]=  i;
3277                     }else{
3278                         i= -i;
3279                         i<<= QEXPSHIFT;
3280                         i/= qmul; //FIXME optimize
3281                         dst[x + y*stride]= -i;
3282                     }
3283                 }else
3284                     dst[x + y*stride]= 0;
3285             }
3286         }
3287     }else{
3288         for(y=0; y<h; y++){
3289             for(x=0; x<w; x++){
3290                 int i= src[x + y*stride];
3291
3292                 if((unsigned)(i+thres1) > thres2){
3293                     if(i>=0){
3294                         i<<= QEXPSHIFT;
3295                         i= (i + bias) / qmul; //FIXME optimize
3296                         dst[x + y*stride]=  i;
3297                     }else{
3298                         i= -i;
3299                         i<<= QEXPSHIFT;
3300                         i= (i + bias) / qmul; //FIXME optimize
3301                         dst[x + y*stride]= -i;
3302                     }
3303                 }else
3304                     dst[x + y*stride]= 0;
3305             }
3306         }
3307     }
3308 }
3309
3310 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3311     const int w= b->width;
3312     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3313     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3314     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3315     int x,y;
3316
3317     if(s->qlog == LOSSLESS_QLOG) return;
3318
3319     for(y=start_y; y<end_y; y++){
3320 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3321         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3322         for(x=0; x<w; x++){
3323             int i= line[x];
3324             if(i<0){
3325                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3326             }else if(i>0){
3327                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3328             }
3329         }
3330     }
3331 }
3332
3333 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3334     const int w= b->width;
3335     const int h= b->height;
3336     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3337     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3338     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3339     int x,y;
3340
3341     if(s->qlog == LOSSLESS_QLOG) return;
3342
3343     for(y=0; y<h; y++){
3344         for(x=0; x<w; x++){
3345             int i= src[x + y*stride];
3346             if(i<0){
3347                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3348             }else if(i>0){
3349                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3350             }
3351         }
3352     }
3353 }
3354
3355 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3356     const int w= b->width;
3357     const int h= b->height;
3358     int x,y;
3359
3360     for(y=h-1; y>=0; y--){
3361         for(x=w-1; x>=0; x--){
3362             int i= x + y*stride;
3363
3364             if(x){
3365                 if(use_median){
3366                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3367                     else  src[i] -= src[i - 1];
3368                 }else{
3369                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3370                     else  src[i] -= src[i - 1];
3371                 }
3372             }else{
3373                 if(y) src[i] -= src[i - stride];
3374             }
3375         }
3376     }
3377 }
3378
3379 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3380     const int w= b->width;
3381     int x,y;
3382
3383     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3384     IDWTELEM * prev;
3385
3386     if (start_y != 0)
3387         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3388
3389     for(y=start_y; y<end_y; y++){
3390         prev = line;
3391 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3392         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3393         for(x=0; x<w; x++){
3394             if(x){
3395                 if(use_median){
3396                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3397                     else  line[x] += line[x - 1];
3398                 }else{
3399                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3400                     else  line[x] += line[x - 1];
3401                 }
3402             }else{
3403                 if(y) line[x] += prev[x];
3404             }
3405         }
3406     }
3407 }
3408
3409 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3410     const int w= b->width;
3411     const int h= b->height;
3412     int x,y;
3413
3414     for(y=0; y<h; y++){
3415         for(x=0; x<w; x++){
3416             int i= x + y*stride;
3417
3418             if(x){
3419                 if(use_median){
3420                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3421                     else  src[i] += src[i - 1];
3422                 }else{
3423                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3424                     else  src[i] += src[i - 1];
3425                 }
3426             }else{
3427                 if(y) src[i] += src[i - stride];
3428             }
3429         }
3430     }
3431 }
3432
3433 static void encode_qlogs(SnowContext *s){
3434     int plane_index, level, orientation;
3435
3436     for(plane_index=0; plane_index<2; plane_index++){
3437         for(level=0; level<s->spatial_decomposition_count; level++){
3438             for(orientation=level ? 1:0; orientation<4; orientation++){
3439                 if(orientation==2) continue;
3440                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3441             }
3442         }
3443     }
3444 }
3445
3446 static void encode_header(SnowContext *s){
3447     int plane_index, i;
3448     uint8_t kstate[32];
3449
3450     memset(kstate, MID_STATE, sizeof(kstate));
3451
3452     put_rac(&s->c, kstate, s->keyframe);
3453     if(s->keyframe || s->always_reset){
3454         reset_contexts(s);
3455         s->last_spatial_decomposition_type=
3456         s->last_qlog=
3457         s->last_qbias=
3458         s->last_mv_scale=
3459         s->last_block_max_depth= 0;
3460         for(plane_index=0; plane_index<2; plane_index++){
3461             Plane *p= &s->plane[plane_index];
3462             p->last_htaps=0;
3463             p->last_diag_mc=0;
3464             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3465         }
3466     }
3467     if(s->keyframe){
3468         put_symbol(&s->c, s->header_state, s->version, 0);
3469         put_rac(&s->c, s->header_state, s->always_reset);
3470         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3471         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3472         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3473         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3474         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3475         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3476         put_rac(&s->c, s->header_state, s->spatial_scalability);
3477 //        put_rac(&s->c, s->header_state, s->rate_scalability);
3478         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3479
3480         encode_qlogs(s);
3481     }
3482
3483     if(!s->keyframe){
3484         int update_mc=0;
3485         for(plane_index=0; plane_index<2; plane_index++){
3486             Plane *p= &s->plane[plane_index];
3487             update_mc |= p->last_htaps   != p->htaps;
3488             update_mc |= p->last_diag_mc != p->diag_mc;
3489             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3490         }
3491         put_rac(&s->c, s->header_state, update_mc);
3492         if(update_mc){
3493             for(plane_index=0; plane_index<2; plane_index++){
3494                 Plane *p= &s->plane[plane_index];
3495                 put_rac(&s->c, s->header_state, p->diag_mc);
3496                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3497                 for(i= p->htaps/2; i; i--)
3498                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3499             }
3500         }
3501         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3502             put_rac(&s->c, s->header_state, 1);
3503             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3504             encode_qlogs(s);
3505         }else
3506             put_rac(&s->c, s->header_state, 0);
3507     }
3508
3509     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3510     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3511     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3512     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3513     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3514
3515 }
3516
3517 static void update_last_header_values(SnowContext *s){
3518     int plane_index;
3519
3520     if(!s->keyframe){
3521         for(plane_index=0; plane_index<2; plane_index++){
3522             Plane *p= &s->plane[plane_index];
3523             p->last_diag_mc= p->diag_mc;
3524             p->last_htaps  = p->htaps;
3525             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3526         }
3527     }
3528
3529     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3530     s->last_qlog                        = s->qlog;
3531     s->last_qbias                       = s->qbias;
3532     s->last_mv_scale                    = s->mv_scale;
3533     s->last_block_max_depth             = s->block_max_depth;
3534     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3535 }
3536
3537 static void decode_qlogs(SnowContext *s){
3538     int plane_index, level, orientation;
3539
3540     for(plane_index=0; plane_index<3; plane_index++){
3541         for(level=0; level<s->spatial_decomposition_count; level++){
3542             for(orientation=level ? 1:0; orientation<4; orientation++){
3543                 int q;
3544                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3545                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3546                 else                    q= get_symbol(&s->c, s->header_state, 1);
3547                 s->plane[plane_index].band[level][orientation].qlog= q;
3548             }
3549         }
3550     }
3551 }
3552
3553 static int decode_header(SnowContext *s){
3554     int plane_index;
3555     uint8_t kstate[32];
3556
3557     memset(kstate, MID_STATE, sizeof(kstate));
3558
3559     s->keyframe= get_rac(&s->c, kstate);
3560     if(s->keyframe || s->always_reset){
3561         reset_contexts(s);
3562         s->spatial_decomposition_type=
3563         s->qlog=
3564         s->qbias=
3565         s->mv_scale=
3566         s->block_max_depth= 0;
3567     }
3568     if(s->keyframe){
3569         s->version= get_symbol(&s->c, s->header_state, 0);
3570         if(s->version>0){
3571             av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3572             return -1;
3573         }
3574         s->always_reset= get_rac(&s->c, s->header_state);
3575         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3576         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3577         s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3578         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3579         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3580         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3581         s->spatial_scalability= get_rac(&s->c, s->header_state);
3582 //        s->rate_scalability= get_rac(&s->c, s->header_state);
3583         s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3584
3585         decode_qlogs(s);
3586     }
3587
3588     if(!s->keyframe){
3589         if(get_rac(&s->c, s->header_state)){
3590             for(plane_index=0; plane_index<2; plane_index++){
3591                 int htaps, i, sum=0;
3592                 Plane *p= &s->plane[plane_index];
3593                 p->diag_mc= get_rac(&s->c, s->header_state);
3594                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3595                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3596                     return -1;
3597                 p->htaps= htaps;
3598                 for(i= htaps/2; i; i--){
3599                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3600                     sum += p->hcoeff[i];
3601                 }
3602                 p->hcoeff[0]= 32-sum;
3603             }
3604             s->plane[2].diag_mc= s->plane[1].diag_mc;
3605             s->plane[2].htaps  = s->plane[1].htaps;
3606             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3607         }
3608         if(get_rac(&s->c, s->header_state)){
3609             s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3610             decode_qlogs(s);
3611         }
3612     }
3613
3614     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3615     if(s->spatial_decomposition_type > 1){
3616         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3617         return -1;
3618     }
3619
3620     s->qlog           += get_symbol(&s->c, s->header_state, 1);
3621     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3622     s->qbias          += get_symbol(&s->c, s->header_state, 1);
3623     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3624     if(s->block_max_depth > 1 || s->block_max_depth < 0){
3625         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3626         s->block_max_depth= 0;
3627         return -1;
3628     }
3629
3630     return 0;
3631 }
3632
3633 static void init_qexp(void){
3634     int i;
3635     double v=128;
3636
3637     for(i=0; i<QROOT; i++){
3638         qexp[i]= lrintf(v);
3639         v *= pow(2, 1.0 / QROOT);
3640     }
3641 }
3642
3643 static av_cold int common_init(AVCodecContext *avctx){
3644     SnowContext *s = avctx->priv_data;
3645     int width, height;
3646     int i, j;
3647
3648     s->avctx= avctx;
3649
3650     dsputil_init(&s->dsp, avctx);
3651
3652 #define mcf(dx,dy)\
3653     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3654     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3655         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3656     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3657     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3658         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3659
3660     mcf( 0, 0)
3661     mcf( 4, 0)
3662     mcf( 8, 0)
3663     mcf(12, 0)
3664     mcf( 0, 4)
3665     mcf( 4, 4)
3666     mcf( 8, 4)
3667     mcf(12, 4)
3668     mcf( 0, 8)
3669     mcf( 4, 8)
3670     mcf( 8, 8)
3671     mcf(12, 8)
3672     mcf( 0,12)
3673     mcf( 4,12)
3674     mcf( 8,12)
3675     mcf(12,12)
3676
3677 #define mcfh(dx,dy)\
3678     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3679     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3680         mc_block_hpel ## dx ## dy ## 16;\
3681     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3682     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3683         mc_block_hpel ## dx ## dy ## 8;
3684
3685     mcfh(0, 0)
3686     mcfh(8, 0)
3687     mcfh(0, 8)
3688     mcfh(8, 8)
3689
3690     if(!qexp[0])
3691         init_qexp();
3692
3693 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3694
3695     width= s->avctx->width;
3696     height= s->avctx->height;
3697
3698     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3699     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3700
3701     for(i=0; i<MAX_REF_FRAMES; i++)
3702         for(j=0; j<MAX_REF_FRAMES; j++)
3703             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3704
3705     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3706
3707     return 0;
3708 }
3709
3710 static int common_init_after_header(AVCodecContext *avctx){
3711     SnowContext *s = avctx->priv_data;
3712     int plane_index, level, orientation;
3713
3714     for(plane_index=0; plane_index<3; plane_index++){
3715         int w= s->avctx->width;
3716         int h= s->avctx->height;
3717
3718         if(plane_index){
3719             w>>= s->chroma_h_shift;
3720             h>>= s->chroma_v_shift;
3721         }
3722         s->plane[plane_index].width = w;
3723         s->plane[plane_index].height= h;
3724
3725         for(level=s->spatial_decomposition_count-1; level>=0; level--){
3726             for(orientation=level ? 1 : 0; orientation<4; orientation++){
3727                 SubBand *b= &s->plane[plane_index].band[level][orientation];
3728
3729                 b->buf= s->spatial_dwt_buffer;
3730                 b->level= level;
3731                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3732                 b->width = (w + !(orientation&1))>>1;
3733                 b->height= (h + !(orientation>1))>>1;
3734
3735                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3736                 b->buf_x_offset = 0;
3737                 b->buf_y_offset = 0;
3738
3739                 if(orientation&1){
3740                     b->buf += (w+1)>>1;
3741                     b->buf_x_offset = (w+1)>>1;
3742                 }
3743                 if(orientation>1){
3744                     b->buf += b->stride>>1;
3745                     b->buf_y_offset = b->stride_line >> 1;
3746                 }
3747                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3748
3749                 if(level)
3750                     b->parent= &s->plane[plane_index].band[level-1][orientation];
3751                 //FIXME avoid this realloc
3752                 av_freep(&b->x_coeff);
3753                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3754             }
3755             w= (w+1)>>1;
3756             h= (h+1)>>1;
3757         }
3758     }
3759
3760     return 0;
3761 }
3762
3763 static int qscale2qlog(int qscale){
3764     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3765            + 61*QROOT/8; //<64 >60
3766 }
3767
3768 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3769 {
3770     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3771      * FIXME we know exact mv bits at this point,
3772      * but ratecontrol isn't set up to include them. */
3773     uint32_t coef_sum= 0;
3774     int level, orientation, delta_qlog;
3775
3776     for(level=0; level<s->spatial_decomposition_count; level++){
3777         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3778             SubBand *b= &s->plane[0].band[level][orientation];
3779             IDWTELEM *buf= b->ibuf;
3780             const int w= b->width;
3781             const int h= b->height;
3782             const int stride= b->stride;
3783             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3784             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3785             const int qdiv= (1<<16)/qmul;
3786             int x, y;
3787             //FIXME this is ugly
3788             for(y=0; y<h; y++)
3789                 for(x=0; x<w; x++)
3790                     buf[x+y*stride]= b->buf[x+y*stride];
3791             if(orientation==0)
3792                 decorrelate(s, b, buf, stride, 1, 0);
3793             for(y=0; y<h; y++)
3794                 for(x=0; x<w; x++)
3795                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3796         }
3797     }
3798
3799     /* ugly, ratecontrol just takes a sqrt again */
3800     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3801     assert(coef_sum < INT_MAX);
3802
3803     if(pict->pict_type == FF_I_TYPE){
3804         s->m.current_picture.mb_var_sum= coef_sum;
3805         s->m.current_picture.mc_mb_var_sum= 0;
3806     }else{
3807         s->m.current_picture.mc_mb_var_sum= coef_sum;
3808         s->m.current_picture.mb_var_sum= 0;
3809     }
3810
3811     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3812     if (pict->quality < 0)
3813         return INT_MIN;
3814     s->lambda= pict->quality * 3/2;
3815     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3816     s->qlog+= delta_qlog;
3817     return delta_qlog;
3818 }
3819
3820 static void calculate_visual_weight(SnowContext *s, Plane *p){
3821     int width = p->width;
3822     int height= p->height;
3823     int level, orientation, x, y;
3824
3825     for(level=0; level<s->spatial_decomposition_count; level++){
3826         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3827             SubBand *b= &p->band[level][orientation];
3828             IDWTELEM *ibuf= b->ibuf;
3829             int64_t error=0;
3830
3831             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3832             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3833             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3834             for(y=0; y<height; y++){
3835                 for(x=0; x<width; x++){
3836                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3837                     error += d*d;
3838                 }
3839             }
3840
3841             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3842         }
3843     }
3844 }
3845
3846 #define QUANTIZE2 0
3847
3848 #if QUANTIZE2==1
3849 #define Q2_STEP 8
3850
3851 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3852     SubBand *b= &p->band[level][orientation];
3853     int x, y;
3854     int xo=0;
3855     int yo=0;
3856     int step= 1 << (s->spatial_decomposition_count - level);
3857
3858     if(orientation&1)
3859         xo= step>>1;
3860     if(orientation&2)
3861         yo= step>>1;
3862
3863     //FIXME bias for nonzero ?
3864     //FIXME optimize
3865     memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3866     for(y=0; y<p->height; y++){
3867         for(x=0; x<p->width; x++){
3868             int sx= (x-xo + step/2) / step / Q2_STEP;
3869             int sy= (y-yo + step/2) / step / Q2_STEP;
3870             int v= r0[x + y*p->width] - r1[x + y*p->width];
3871             assert(sx>=0 && sy>=0 && sx < score_stride);
3872             v= ((v+8)>>4)<<4;
3873             score[sx + sy*score_stride] += v*v;
3874             assert(score[sx + sy*score_stride] >= 0);
3875         }
3876     }
3877 }
3878
3879 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3880     int level, orientation;
3881
3882     for(level=0; level<s->spatial_decomposition_count; level++){
3883         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3884             SubBand *b= &p->band[level][orientation];
3885             IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3886
3887             dequantize(s, b, dst, b->stride);
3888         }
3889     }
3890 }
3891
3892 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3893     int level, orientation, ys, xs, x, y, pass;
3894     IDWTELEM best_dequant[height * stride];
3895     IDWTELEM idwt2_buffer[height * stride];
3896     const int score_stride= (width + 10)/Q2_STEP;
3897     int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3898     int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3899     int threshold= (s->m.lambda * s->m.lambda) >> 6;
3900
3901     //FIXME pass the copy cleanly ?
3902
3903 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3904     ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3905
3906     for(level=0; level<s->spatial_decomposition_count; level++){
3907         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3908             SubBand *b= &p->band[level][orientation];
3909             IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3910              DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3911             assert(src == b->buf); // code does not depend on this but it is true currently
3912
3913             quantize(s, b, dst, src, b->stride, s->qbias);
3914         }
3915     }
3916     for(pass=0; pass<1; pass++){
3917         if(s->qbias == 0) //keyframe
3918             continue;
3919         for(level=0; level<s->spatial_decomposition_count; level++){
3920             for(orientation=level ? 1 : 0; orientation<4; orientation++){
3921                 SubBand *b= &p->band[level][orientation];
3922                 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3923                 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3924
3925                 for(ys= 0; ys<Q2_STEP; ys++){
3926                     for(xs= 0; xs<Q2_STEP; xs++){
3927                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3928                         dequantize_all(s, p, idwt2_buffer, width, height);
3929                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3930                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3931                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3932                         for(y=ys; y<b->height; y+= Q2_STEP){
3933                             for(x=xs; x<b->width; x+= Q2_STEP){
3934                                 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3935                                 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3936                                 //FIXME try more than just --
3937                             }
3938                         }
3939                         dequantize_all(s, p, idwt2_buffer, width, height);
3940                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3941                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3942                         for(y=ys; y<b->height; y+= Q2_STEP){
3943                             for(x=xs; x<b->width; x+= Q2_STEP){
3944                                 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3945                                 if(score[score_idx] <= best_score[score_idx] + threshold){
3946                                     best_score[score_idx]= score[score_idx];
3947                                     if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3948                                     if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3949                                     //FIXME copy instead
3950                                 }
3951                             }
3952                         }
3953                     }
3954                 }
3955             }
3956         }
3957     }
3958     memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3959 }
3960
3961 #endif /* QUANTIZE2==1 */
3962
3963 static av_cold int encode_init(AVCodecContext *avctx)
3964 {
3965     SnowContext *s = avctx->priv_data;
3966     int plane_index;
3967
3968     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3969         av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3970                "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3971         return -1;
3972     }
3973
3974     if(avctx->prediction_method == DWT_97
3975        && (avctx->flags & CODEC_FLAG_QSCALE)
3976        && avctx->global_quality == 0){
3977         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3978         return -1;
3979     }
3980
3981     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3982
3983     s->chroma_h_shift= 1; //FIXME XXX
3984     s->chroma_v_shift= 1;
3985
3986     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3987     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3988
3989     for(plane_index=0; plane_index<3; plane_index++){
3990         s->plane[plane_index].diag_mc= 1;
3991         s->plane[plane_index].htaps= 6;
3992         s->plane[plane_index].hcoeff[0]=  40;
3993         s->plane[plane_index].hcoeff[1]= -10;
3994         s->plane[plane_index].hcoeff[2]=   2;
3995         s->plane[plane_index].fast_mc= 1;
3996     }
3997
3998     common_init(avctx);
3999     alloc_blocks(s);
4000
4001     s->version=0;
4002
4003     s->m.avctx   = avctx;
4004     s->m.flags   = avctx->flags;
4005     s->m.bit_rate= avctx->bit_rate;
4006
4007     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4008     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4009     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4010     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4011     h263_encode_init(&s->m); //mv_penalty
4012
4013     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4014
4015     if(avctx->flags&CODEC_FLAG_PASS1){
4016         if(!avctx->stats_out)
4017             avctx->stats_out = av_mallocz(256);
4018     }
4019     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4020         if(ff_rate_control_init(&s->m) < 0)
4021             return -1;
4022     }
4023     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4024
4025     avctx->coded_frame= &s->current_picture;
4026     switch(avctx->pix_fmt){
4027 //    case PIX_FMT_YUV444P:
4028 //    case PIX_FMT_YUV422P:
4029     case PIX_FMT_YUV420P:
4030     case PIX_FMT_GRAY8:
4031 //    case PIX_FMT_YUV411P:
4032 //    case PIX_FMT_YUV410P:
4033         s->colorspace_type= 0;
4034         break;
4035 /*    case PIX_FMT_RGB32:
4036         s->colorspace= 1;
4037         break;*/
4038     default:
4039         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4040         return -1;
4041     }
4042 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4043     s->chroma_h_shift= 1;
4044     s->chroma_v_shift= 1;
4045
4046     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4047     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4048
4049     s->avctx->get_buffer(s->avctx, &s->input_picture);
4050
4051     if(s->avctx->me_method == ME_ITER){
4052         int i;
4053         int size= s->b_width * s->b_height << 2*s->block_max_depth;
4054         for(i=0; i<s->max_ref_frames; i++){
4055             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4056             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4057         }
4058     }
4059
4060     return 0;
4061 }
4062
4063 #define USE_HALFPEL_PLANE 0
4064
4065 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4066     int p,x,y;
4067
4068     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4069
4070     for(p=0; p<3; p++){
4071         int is_chroma= !!p;
4072         int w= s->avctx->width  >>is_chroma;
4073         int h= s->avctx->height >>is_chroma;
4074         int ls= frame->linesize[p];
4075         uint8_t *src= frame->data[p];
4076
4077         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4078         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4079         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4080
4081         halfpel[0][p]= src;
4082         for(y=0; y<h; y++){
4083             for(x=0; x<w; x++){
4084                 int i= y*ls + x;
4085
4086                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4087             }
4088         }
4089         for(y=0; y<h; y++){
4090             for(x=0; x<w; x++){
4091                 int i= y*ls + x;
4092
4093                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4094             }
4095         }
4096         src= halfpel[1][p];
4097         for(y=0; y<h; y++){
4098             for(x=0; x<w; x++){
4099                 int i= y*ls + x;
4100
4101                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4102             }
4103         }
4104
4105 //FIXME border!
4106     }
4107 }
4108
4109 static int frame_start(SnowContext *s){
4110    AVFrame tmp;
4111    int w= s->avctx->width; //FIXME round up to x16 ?
4112    int h= s->avctx->height;
4113
4114     if(s->current_picture.data[0]){
4115         s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4116         s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4117         s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4118     }
4119
4120     tmp= s->last_picture[s->max_ref_frames-1];
4121     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4122     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4123     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4124         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4125     s->last_picture[0]= s->current_picture;
4126     s->current_picture= tmp;
4127
4128     if(s->keyframe){
4129         s->ref_frames= 0;
4130     }else{
4131         int i;
4132         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4133             if(i && s->last_picture[i-1].key_frame)
4134                 break;
4135         s->ref_frames= i;
4136     }
4137
4138     s->current_picture.reference= 1;
4139     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4140         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4141         return -1;
4142     }
4143
4144     s->current_picture.key_frame= s->keyframe;
4145
4146     return 0;
4147 }
4148
4149 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4150     SnowContext *s = avctx->priv_data;
4151     RangeCoder * const c= &s->c;
4152     AVFrame *pict = data;
4153     const int width= s->avctx->width;
4154     const int height= s->avctx->height;
4155     int level, orientation, plane_index, i, y;
4156     uint8_t rc_header_bak[sizeof(s->header_state)];
4157     uint8_t rc_block_bak[sizeof(s->block_state)];
4158
4159     ff_init_range_encoder(c, buf, buf_size);
4160     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4161
4162     for(i=0; i<3; i++){
4163         int shift= !!i;
4164         for(y=0; y<(height>>shift); y++)
4165             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4166                    &pict->data[i][y * pict->linesize[i]],
4167                    width>>shift);
4168     }
4169     s->new_picture = *pict;
4170
4171     s->m.picture_number= avctx->frame_number;
4172     if(avctx->flags&CODEC_FLAG_PASS2){
4173         s->m.pict_type =
4174         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4175         s->keyframe= pict->pict_type==FF_I_TYPE;
4176         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4177             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4178             if (pict->quality < 0)
4179                 return -1;
4180         }
4181     }else{
4182         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4183         s->m.pict_type=
4184         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4185     }
4186
4187     if(s->pass1_rc && avctx->frame_number == 0)
4188         pict->quality= 2*FF_QP2LAMBDA;
4189     if(pict->quality){
4190         s->qlog= qscale2qlog(pict->quality);
4191         s->lambda = pict->quality * 3/2;
4192     }
4193     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4194         s->qlog= LOSSLESS_QLOG;
4195         s->lambda = 0;
4196     }//else keep previous frame's qlog until after motion estimation
4197
4198     frame_start(s);
4199
4200     s->m.current_picture_ptr= &s->m.current_picture;
4201     if(pict->pict_type == FF_P_TYPE){
4202         int block_width = (width +15)>>4;
4203         int block_height= (height+15)>>4;
4204         int stride= s->current_picture.linesize[0];
4205
4206         assert(s->current_picture.data[0]);
4207         assert(s->last_picture[0].data[0]);
4208
4209         s->m.avctx= s->avctx;
4210         s->m.current_picture.data[0]= s->current_picture.data[0];
4211         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
4212         s->m.    new_picture.data[0]= s->  input_picture.data[0];
4213         s->m.   last_picture_ptr= &s->m.   last_picture;
4214         s->m.linesize=
4215         s->m.   last_picture.linesize[0]=
4216         s->m.    new_picture.linesize[0]=
4217         s->m.current_picture.linesize[0]= stride;
4218         s->m.uvlinesize= s->current_picture.linesize[1];
4219         s->m.width = width;
4220         s->m.height= height;
4221         s->m.mb_width = block_width;
4222         s->m.mb_height= block_height;
4223         s->m.mb_stride=   s->m.mb_width+1;
4224         s->m.b8_stride= 2*s->m.mb_width+1;
4225         s->m.f_code=1;
4226         s->m.pict_type= pict->pict_type;
4227         s->m.me_method= s->avctx->me_method;
4228         s->m.me.scene_change_score=0;
4229         s->m.flags= s->avctx->flags;
4230         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4231         s->m.out_format= FMT_H263;
4232         s->m.unrestricted_mv= 1;
4233
4234         s->m.lambda = s->lambda;
4235         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4236         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4237
4238         s->m.dsp= s->dsp; //move
4239         ff_init_me(&s->m);
4240         s->dsp= s->m.dsp;
4241     }
4242
4243     if(s->pass1_rc){
4244         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4245         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4246     }
4247
4248 redo_frame:
4249
4250     if(pict->pict_type == FF_I_TYPE)
4251         s->spatial_decomposition_count= 5;
4252     else
4253         s->spatial_decomposition_count= 5;
4254
4255     s->m.pict_type = pict->pict_type;
4256     s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4257
4258     common_init_after_header(avctx);
4259
4260     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4261         for(plane_index=0; plane_index<3; plane_index++){
4262             calculate_visual_weight(s, &s->plane[plane_index]);
4263         }
4264     }
4265
4266     encode_header(s);
4267     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4268     encode_blocks(s, 1);
4269     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4270
4271     for(plane_index=0; plane_index<3; plane_index++){
4272         Plane *p= &s->plane[plane_index];
4273         int w= p->width;
4274         int h= p->height;
4275         int x, y;
4276 //        int bits= put_bits_count(&s->c.pb);
4277
4278         if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4279             //FIXME optimize
4280             if(pict->data[plane_index]) //FIXME gray hack
4281                 for(y=0; y<h; y++){
4282                     for(x=0; x<w; x++){
4283                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4284                     }
4285                 }
4286             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4287
4288             if(   plane_index==0
4289                && pict->pict_type == FF_P_TYPE
4290                && !(avctx->flags&CODEC_FLAG_PASS2)
4291                && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4292                 ff_init_range_encoder(c, buf, buf_size);
4293                 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4294                 pict->pict_type= FF_I_TYPE;
4295                 s->keyframe=1;
4296                 s->current_picture.key_frame=1;
4297                 goto redo_frame;
4298             }
4299
4300             if(s->qlog == LOSSLESS_QLOG){
4301                 for(y=0; y<h; y++){
4302                     for(x=0; x<w; x++){
4303                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4304                     }
4305                 }
4306             }else{
4307                 for(y=0; y<h; y++){
4308                     for(x=0; x<w; x++){
4309                         s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4310                     }
4311                 }
4312             }
4313
4314             /*  if(QUANTIZE2)
4315                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4316             else*/
4317                 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4318
4319             if(s->pass1_rc && plane_index==0){
4320                 int delta_qlog = ratecontrol_1pass(s, pict);
4321                 if (delta_qlog <= INT_MIN)
4322                     return -1;
4323                 if(delta_qlog){
4324                     //reordering qlog in the bitstream would eliminate this reset
4325                     ff_init_range_encoder(c, buf, buf_size);
4326                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4327                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4328                     encode_header(s);
4329                     encode_blocks(s, 0);
4330                 }
4331             }
4332
4333             for(level=0; level<s->spatial_decomposition_count; level++){
4334                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4335                     SubBand *b= &p->band[level][orientation];
4336
4337                     if(!QUANTIZE2)
4338                         quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4339                     if(orientation==0)
4340                         decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4341                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4342                     assert(b->parent==NULL || b->parent->stride == b->stride*2);
4343                     if(orientation==0)
4344                         correlate(s, b, b->ibuf, b->stride, 1, 0);
4345                 }
4346             }
4347
4348             for(level=0; level<s->spatial_decomposition_count; level++){
4349                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4350                     SubBand *b= &p->band[level][orientation];
4351
4352                     dequantize(s, b, b->ibuf, b->stride);
4353                 }
4354             }
4355
4356             ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4357             if(s->qlog == LOSSLESS_QLOG){
4358                 for(y=0; y<h; y++){
4359                     for(x=0; x<w; x++){
4360                         s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4361                     }
4362                 }
4363             }
4364             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4365         }else{
4366             //ME/MC only
4367             if(pict->pict_type == FF_I_TYPE){
4368                 for(y=0; y<h; y++){
4369                     for(x=0; x<w; x++){
4370                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4371                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
4372                     }
4373                 }
4374             }else{
4375                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4376                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4377             }
4378         }
4379         if(s->avctx->flags&CODEC_FLAG_PSNR){
4380             int64_t error= 0;
4381
4382             if(pict->data[plane_index]) //FIXME gray hack
4383                 for(y=0; y<h; y++){
4384                     for(x=0; x<w; x++){
4385                         int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4386                         error += d*d;
4387                     }
4388                 }
4389             s->avctx->error[plane_index] += error;
4390             s->current_picture.error[plane_index] = error;
4391         }
4392
4393     }
4394
4395     update_last_header_values(s);
4396
4397     if(s->last_picture[s->max_ref_frames-1].data[0]){
4398         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4399         for(i=0; i<9; i++)
4400             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4401                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4402     }
4403
4404     s->current_picture.coded_picture_number = avctx->frame_number;
4405     s->current_picture.pict_type = pict->pict_type;
4406     s->current_picture.quality = pict->quality;
4407     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4408     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4409     s->m.current_picture.display_picture_number =
4410     s->m.current_picture.coded_picture_number = avctx->frame_number;
4411     s->m.current_picture.quality = pict->quality;
4412     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4413     if(s->pass1_rc)
4414         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4415             return -1;
4416     if(avctx->flags&CODEC_FLAG_PASS1)
4417         ff_write_pass1_stats(&s->m);
4418     s->m.last_pict_type = s->m.pict_type;
4419     avctx->frame_bits = s->m.frame_bits;
4420     avctx->mv_bits = s->m.mv_bits;
4421     avctx->misc_bits = s->m.misc_bits;
4422     avctx->p_tex_bits = s->m.p_tex_bits;
4423
4424     emms_c();
4425
4426     return ff_rac_terminate(c);
4427 }
4428
4429 static av_cold void common_end(SnowContext *s){
4430     int plane_index, level, orientation, i;
4431
4432     av_freep(&s->spatial_dwt_buffer);
4433     av_freep(&s->spatial_idwt_buffer);
4434
4435     av_freep(&s->m.me.scratchpad);
4436     av_freep(&s->m.me.map);
4437     av_freep(&s->m.me.score_map);
4438     av_freep(&s->m.obmc_scratchpad);
4439
4440     av_freep(&s->block);
4441
4442     for(i=0; i<MAX_REF_FRAMES; i++){
4443         av_freep(&s->ref_mvs[i]);
4444         av_freep(&s->ref_scores[i]);
4445         if(s->last_picture[i].data[0])
4446             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4447     }
4448
4449     for(plane_index=0; plane_index<3; plane_index++){
4450         for(level=s->spatial_decomposition_count-1; level>=0; level--){
4451             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4452                 SubBand *b= &s->plane[plane_index].band[level][orientation];
4453
4454                 av_freep(&b->x_coeff);
4455             }
4456         }
4457     }
4458 }
4459
4460 static av_cold int encode_end(AVCodecContext *avctx)
4461 {
4462     SnowContext *s = avctx->priv_data;
4463
4464     common_end(s);
4465     av_free(avctx->stats_out);
4466
4467     return 0;
4468 }
4469
4470 static av_cold int decode_init(AVCodecContext *avctx)
4471 {
4472     avctx->pix_fmt= PIX_FMT_YUV420P;
4473
4474     common_init(avctx);
4475
4476     return 0;
4477 }
4478
4479 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4480     SnowContext *s = avctx->priv_data;
4481     RangeCoder * const c= &s->c;
4482     int bytes_read;
4483     AVFrame *picture = data;
4484     int level, orientation, plane_index, i;
4485
4486     ff_init_range_decoder(c, buf, buf_size);
4487     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4488
4489     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4490     if(decode_header(s)<0)
4491         return -1;
4492     common_init_after_header(avctx);
4493
4494     // realloc slice buffer for the case that spatial_decomposition_count changed
4495     slice_buffer_destroy(&s->sb);
4496     slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4497
4498     for(plane_index=0; plane_index<3; plane_index++){
4499         Plane *p= &s->plane[plane_index];
4500         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4501                                               && p->hcoeff[1]==-10
4502                                               && p->hcoeff[2]==2;
4503     }
4504
4505     if(!s->block) alloc_blocks(s);
4506
4507     frame_start(s);
4508     //keyframe flag duplication mess FIXME
4509     if(avctx->debug&FF_DEBUG_PICT_INFO)
4510         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4511
4512     decode_blocks(s);
4513
4514     for(plane_index=0; plane_index<3; plane_index++){
4515         Plane *p= &s->plane[plane_index];
4516         int w= p->width;
4517         int h= p->height;
4518         int x, y;
4519         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4520
4521         if(s->avctx->debug&2048){
4522             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4523             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4524
4525             for(y=0; y<h; y++){
4526                 for(x=0; x<w; x++){
4527                     int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4528                     s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4529                 }
4530             }
4531         }
4532
4533         {
4534         for(level=0; level<s->spatial_decomposition_count; level++){
4535             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4536                 SubBand *b= &p->band[level][orientation];
4537                 unpack_coeffs(s, b, b->parent, orientation);
4538             }
4539         }
4540         }
4541
4542         {
4543         const int mb_h= s->b_height << s->block_max_depth;
4544         const int block_size = MB_SIZE >> s->block_max_depth;
4545         const int block_w    = plane_index ? block_size/2 : block_size;
4546         int mb_y;
4547         dwt_compose_t cs[MAX_DECOMPOSITIONS];
4548         int yd=0, yq=0;
4549         int y;
4550         int end_y;
4551
4552         ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4553         for(mb_y=0; mb_y<=mb_h; mb_y++){
4554
4555             int slice_starty = block_w*mb_y;
4556             int slice_h = block_w*(mb_y+1);
4557             if (!(s->keyframe || s->avctx->debug&512)){
4558                 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4559                 slice_h -= (block_w >> 1);
4560             }
4561
4562             for(level=0; level<s->spatial_decomposition_count; level++){
4563                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4564                     SubBand *b= &p->band[level][orientation];
4565                     int start_y;
4566                     int end_y;
4567                     int our_mb_start = mb_y;
4568                     int our_mb_end = (mb_y + 1);
4569                     const int extra= 3;
4570                     start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4571                     end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4572                     if (!(s->keyframe || s->avctx->debug&512)){
4573                         start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4574                         end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4575                     }
4576                     start_y = FFMIN(b->height, start_y);
4577                     end_y = FFMIN(b->height, end_y);
4578
4579                     if (start_y != end_y){
4580                         if (orientation == 0){
4581                             SubBand * correlate_band = &p->band[0][0];
4582                             int correlate_end_y = FFMIN(b->height, end_y + 1);
4583                             int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4584                             decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4585                             correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4586                             dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4587                         }
4588                         else
4589                             decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4590                     }
4591                 }
4592             }
4593
4594             for(; yd<slice_h; yd+=4){
4595                 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4596             }
4597
4598             if(s->qlog == LOSSLESS_QLOG){
4599                 for(; yq<slice_h && yq<h; yq++){
4600                     IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4601                     for(x=0; x<w; x++){
4602                         line[x] <<= FRAC_BITS;
4603                     }
4604                 }
4605             }
4606
4607             predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4608
4609             y = FFMIN(p->height, slice_starty);
4610             end_y = FFMIN(p->height, slice_h);
4611             while(y < end_y)
4612                 slice_buffer_release(&s->sb, y++);
4613         }
4614
4615         slice_buffer_flush(&s->sb);
4616         }
4617
4618     }
4619
4620     emms_c();
4621
4622     if(s->last_picture[s->max_ref_frames-1].data[0]){
4623         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4624         for(i=0; i<9; i++)
4625             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4626                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4627     }
4628
4629     if(!(s->avctx->debug&2048))
4630         *picture= s->current_picture;
4631     else
4632         *picture= s->mconly_picture;
4633
4634     *data_size = sizeof(AVFrame);
4635
4636     bytes_read= c->bytestream - c->bytestream_start;
4637     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4638
4639     return bytes_read;
4640 }
4641
4642 static av_cold int decode_end(AVCodecContext *avctx)
4643 {
4644     SnowContext *s = avctx->priv_data;
4645
4646     slice_buffer_destroy(&s->sb);
4647
4648     common_end(s);
4649
4650     return 0;
4651 }
4652
4653 AVCodec snow_decoder = {
4654     "snow",
4655     CODEC_TYPE_VIDEO,
4656     CODEC_ID_SNOW,
4657     sizeof(SnowContext),
4658     decode_init,
4659     NULL,
4660     decode_end,
4661     decode_frame,
4662     0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4663     NULL,
4664     .long_name = "Snow",
4665 };
4666
4667 #ifdef CONFIG_SNOW_ENCODER
4668 AVCodec snow_encoder = {
4669     "snow",
4670     CODEC_TYPE_VIDEO,
4671     CODEC_ID_SNOW,
4672     sizeof(SnowContext),
4673     encode_init,
4674     encode_frame,
4675     encode_end,
4676     .long_name = "Snow",
4677 };
4678 #endif
4679
4680
4681 #ifdef TEST
4682 #undef malloc
4683 #undef free
4684 #undef printf
4685 #undef random
4686
4687 int main(void){
4688     int width=256;
4689     int height=256;
4690     int buffer[2][width*height];
4691     SnowContext s;
4692     int i;
4693     s.spatial_decomposition_count=6;
4694     s.spatial_decomposition_type=1;
4695
4696     printf("testing 5/3 DWT\n");
4697     for(i=0; i<width*height; i++)
4698         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4699
4700     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4701     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4702
4703     for(i=0; i<width*height; i++)
4704         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4705
4706     printf("testing 9/7 DWT\n");
4707     s.spatial_decomposition_type=0;
4708     for(i=0; i<width*height; i++)
4709         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4710
4711     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4712     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4713
4714     for(i=0; i<width*height; i++)
4715         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4716
4717 #if 0
4718     printf("testing AC coder\n");
4719     memset(s.header_state, 0, sizeof(s.header_state));
4720     ff_init_range_encoder(&s.c, buffer[0], 256*256);
4721     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4722
4723     for(i=-256; i<256; i++){
4724         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4725     }
4726     ff_rac_terminate(&s.c);
4727
4728     memset(s.header_state, 0, sizeof(s.header_state));
4729     ff_init_range_decoder(&s.c, buffer[0], 256*256);
4730     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4731
4732     for(i=-256; i<256; i++){
4733         int j;
4734         j= get_symbol(&s.c, s.header_state, 1);
4735         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4736     }
4737 #endif
4738     {
4739     int level, orientation, x, y;
4740     int64_t errors[8][4];
4741     int64_t g=0;
4742
4743         memset(errors, 0, sizeof(errors));
4744         s.spatial_decomposition_count=3;
4745         s.spatial_decomposition_type=0;
4746         for(level=0; level<s.spatial_decomposition_count; level++){
4747             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4748                 int w= width  >> (s.spatial_decomposition_count-level);
4749                 int h= height >> (s.spatial_decomposition_count-level);
4750                 int stride= width  << (s.spatial_decomposition_count-level);
4751                 DWTELEM *buf= buffer[0];
4752                 int64_t error=0;
4753
4754                 if(orientation&1) buf+=w;
4755                 if(orientation>1) buf+=stride>>1;
4756
4757                 memset(buffer[0], 0, sizeof(int)*width*height);
4758                 buf[w/2 + h/2*stride]= 256*256;
4759                 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4760                 for(y=0; y<height; y++){
4761                     for(x=0; x<width; x++){
4762                         int64_t d= buffer[0][x + y*width];
4763                         error += d*d;
4764                         if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4765                     }
4766                     if(FFABS(height/2-y)<9 && level==2) printf("\n");
4767                 }
4768                 error= (int)(sqrt(error)+0.5);
4769                 errors[level][orientation]= error;
4770                 if(g) g=ff_gcd(g, error);
4771                 else g= error;
4772             }
4773         }
4774         printf("static int const visual_weight[][4]={\n");
4775         for(level=0; level<s.spatial_decomposition_count; level++){
4776             printf("  {");
4777             for(orientation=0; orientation<4; orientation++){
4778                 printf("%8"PRId64",", errors[level][orientation]/g);
4779             }
4780             printf("},\n");
4781         }
4782         printf("};\n");
4783         {
4784             int level=2;
4785             int w= width  >> (s.spatial_decomposition_count-level);
4786             //int h= height >> (s.spatial_decomposition_count-level);
4787             int stride= width  << (s.spatial_decomposition_count-level);
4788             DWTELEM *buf= buffer[0];
4789             int64_t error=0;
4790
4791             buf+=w;
4792             buf+=stride>>1;
4793
4794             memset(buffer[0], 0, sizeof(int)*width*height);
4795 #if 1
4796             for(y=0; y<height; y++){
4797                 for(x=0; x<width; x++){
4798                     int tab[4]={0,2,3,1};
4799                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4800                 }
4801             }
4802             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4803 #else
4804             for(y=0; y<h; y++){
4805                 for(x=0; x<w; x++){
4806                     buf[x + y*stride  ]=169;
4807                     buf[x + y*stride-w]=64;
4808                 }
4809             }
4810             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4811 #endif
4812             for(y=0; y<height; y++){
4813                 for(x=0; x<width; x++){
4814                     int64_t d= buffer[0][x + y*width];
4815                     error += d*d;
4816                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4817                 }
4818                 if(FFABS(height/2-y)<9) printf("\n");
4819             }
4820         }
4821
4822     }
4823     return 0;
4824 }
4825 #endif /* TEST */