r1009: Move the dependencies to newer package names
[cinelerra_cv/mob.git] / quicktime / ffmpeg / libavcodec / snow.c
blob596d15d3e596ed7fc892d233f6e7a79c05b4d150
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
23 #include "rangecoder.h"
24 #define MID_STATE 128
26 #include "mpegvideo.h"
28 #undef NDEBUG
29 #include <assert.h>
31 #define MAX_DECOMPOSITIONS 8
32 #define MAX_PLANES 4
33 #define DWTELEM int
34 #define QSHIFT 5
35 #define QROOT (1<<QSHIFT)
36 #define LOSSLESS_QLOG -128
37 #define FRAC_BITS 8
39 static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
57 static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
75 static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
93 static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
111 static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
129 static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
147 static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
165 static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
184 #define LOG2_OBMC_MAX 6
185 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186 #if 0 //64*cubic
187 static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 //error:0.000022
222 static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239 //error:0.000033
241 #elif 1 // 64*linear
242 static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
277 static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294 //error:0.000015
296 #else //64*cos
297 static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 //error:0.000022
332 static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349 //error:0.000022
351 #endif
353 //linear *64
354 static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363 //error:0.000000
366 //linear *64
367 static const uint8_t obmc4[16]={
368 4,12,12, 4,
369 12,36,36,12,
370 12,36,36,12,
371 4,12,12, 4,
372 //error:0.000000
375 static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
379 typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1
386 //#define TYPE_NOCOLOR 4
387 uint8_t level; //FIXME merge into type?
388 }BlockNode;
390 #define LOG2_MB_SIZE 4
391 #define MB_SIZE (1<<LOG2_MB_SIZE)
393 typedef struct x_and_coeff{
394 int16_t x;
395 uint16_t coeff;
396 } x_and_coeff;
398 typedef struct SubBand{
399 int level;
400 int stride;
401 int width;
402 int height;
403 int qlog; ///< log(qscale)/log[2^(1/6)]
404 DWTELEM *buf;
405 int buf_x_offset;
406 int buf_y_offset;
407 int stride_line; ///< Stride measured in lines, not pixels.
408 x_and_coeff * x_coeff;
409 struct SubBand *parent;
410 uint8_t state[/*7*2*/ 7 + 512][32];
411 }SubBand;
413 typedef struct Plane{
414 int width;
415 int height;
416 SubBand band[MAX_DECOMPOSITIONS][4];
417 }Plane;
419 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
420 typedef struct {
421 DWTELEM * * line; ///< For use by idwt and predict_slices.
422 DWTELEM * * data_stack; ///< Used for internal purposes.
423 int data_stack_top;
424 int line_count;
425 int line_width;
426 int data_count;
427 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428 } slice_buffer;
430 typedef struct SnowContext{
431 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
433 AVCodecContext *avctx;
434 RangeCoder c;
435 DSPContext dsp;
436 AVFrame input_picture;
437 AVFrame current_picture;
438 AVFrame last_picture;
439 AVFrame mconly_picture;
440 // uint8_t q_context[16];
441 uint8_t header_state[32];
442 uint8_t block_state[128 + 32*128];
443 int keyframe;
444 int always_reset;
445 int version;
446 int spatial_decomposition_type;
447 int temporal_decomposition_type;
448 int spatial_decomposition_count;
449 int temporal_decomposition_count;
450 DWTELEM *spatial_dwt_buffer;
451 int colorspace_type;
452 int chroma_h_shift;
453 int chroma_v_shift;
454 int spatial_scalability;
455 int qlog;
456 int lambda;
457 int lambda2;
458 int mv_scale;
459 int qbias;
460 #define QBIAS_SHIFT 3
461 int b_width;
462 int b_height;
463 int block_max_depth;
464 Plane plane[MAX_PLANES];
465 BlockNode *block;
466 slice_buffer sb;
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469 }SnowContext;
471 typedef struct {
472 DWTELEM *b0;
473 DWTELEM *b1;
474 DWTELEM *b2;
475 DWTELEM *b3;
476 int y;
477 } dwt_compose_t;
479 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
482 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
484 int i;
486 buf->base_buffer = base_buffer;
487 buf->line_count = line_count;
488 buf->line_width = line_width;
489 buf->data_count = max_allocated_lines;
490 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
493 for (i = 0; i < max_allocated_lines; i++)
495 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
498 buf->data_stack_top = max_allocated_lines - 1;
501 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
503 int i;
504 int offset;
505 DWTELEM * buffer;
507 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
509 assert(buf->data_stack_top >= 0);
510 // assert(!buf->line[line]);
511 if (buf->line[line])
512 return buf->line[line];
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
519 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
521 return buffer;
524 static void slice_buffer_release(slice_buffer * buf, int line)
526 int i;
527 int offset;
528 DWTELEM * buffer;
530 assert(line >= 0 && line < buf->line_count);
531 assert(buf->line[line]);
533 offset = buf->line_width * line;
534 buffer = buf->line[line];
535 buf->data_stack_top++;
536 buf->data_stack[buf->data_stack_top] = buffer;
537 buf->line[line] = NULL;
539 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
542 static void slice_buffer_flush(slice_buffer * buf)
544 int i;
545 for (i = 0; i < buf->line_count; i++)
547 if (buf->line[i])
549 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550 slice_buffer_release(buf, i);
555 static void slice_buffer_destroy(slice_buffer * buf)
557 int i;
558 slice_buffer_flush(buf);
560 for (i = buf->data_count - 1; i >= 0; i--)
562 assert(buf->data_stack[i]);
563 av_free(buf->data_stack[i]);
565 assert(buf->data_stack);
566 av_free(buf->data_stack);
567 assert(buf->line);
568 av_free(buf->line);
571 #ifdef __sgi
572 // Avoid a name clash on SGI IRIX
573 #undef qexp
574 #endif
575 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
576 static uint8_t qexp[QROOT];
578 static inline int mirror(int v, int m){
579 if (v<0) return -v;
580 else if(v>m) return 2*m-v;
581 else return v;
584 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
585 int i;
587 if(v){
588 const int a= ABS(v);
589 const int e= av_log2(a);
590 #if 1
591 const int el= FFMIN(e, 10);
592 put_rac(c, state+0, 0);
594 for(i=0; i<el; i++){
595 put_rac(c, state+1+i, 1); //1..10
597 for(; i<e; i++){
598 put_rac(c, state+1+9, 1); //1..10
600 put_rac(c, state+1+FFMIN(i,9), 0);
602 for(i=e-1; i>=el; i--){
603 put_rac(c, state+22+9, (a>>i)&1); //22..31
605 for(; i>=0; i--){
606 put_rac(c, state+22+i, (a>>i)&1); //22..31
609 if(is_signed)
610 put_rac(c, state+11 + el, v < 0); //11..21
611 #else
613 put_rac(c, state+0, 0);
614 if(e<=9){
615 for(i=0; i<e; i++){
616 put_rac(c, state+1+i, 1); //1..10
618 put_rac(c, state+1+i, 0);
620 for(i=e-1; i>=0; i--){
621 put_rac(c, state+22+i, (a>>i)&1); //22..31
624 if(is_signed)
625 put_rac(c, state+11 + e, v < 0); //11..21
626 }else{
627 for(i=0; i<e; i++){
628 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
630 put_rac(c, state+1+FFMIN(i,9), 0);
632 for(i=e-1; i>=0; i--){
633 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
636 if(is_signed)
637 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
639 #endif
640 }else{
641 put_rac(c, state+0, 1);
645 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646 if(get_rac(c, state+0))
647 return 0;
648 else{
649 int i, e, a;
650 e= 0;
651 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
652 e++;
655 a= 1;
656 for(i=e-1; i>=0; i--){
657 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
660 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661 return -a;
662 else
663 return a;
667 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
668 int i;
669 int r= log2>=0 ? 1<<log2 : 1;
671 assert(v>=0);
672 assert(log2>=-4);
674 while(v >= r){
675 put_rac(c, state+4+log2, 1);
676 v -= r;
677 log2++;
678 if(log2>0) r+=r;
680 put_rac(c, state+4+log2, 0);
682 for(i=log2-1; i>=0; i--){
683 put_rac(c, state+31-i, (v>>i)&1);
687 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
688 int i;
689 int r= log2>=0 ? 1<<log2 : 1;
690 int v=0;
692 assert(log2>=-4);
694 while(get_rac(c, state+4+log2)){
695 v+= r;
696 log2++;
697 if(log2>0) r+=r;
700 for(i=log2-1; i>=0; i--){
701 v+= get_rac(c, state+31-i)<<i;
704 return v;
707 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708 const int mirror_left= !highpass;
709 const int mirror_right= (width&1) ^ highpass;
710 const int w= (width>>1) - 1 + (highpass & width);
711 int i;
713 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714 if(mirror_left){
715 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716 dst += dst_step;
717 src += src_step;
720 for(i=0; i<w; i++){
721 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
724 if(mirror_right){
725 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
729 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730 const int mirror_left= !highpass;
731 const int mirror_right= (width&1) ^ highpass;
732 const int w= (width>>1) - 1 + (highpass & width);
733 int i;
735 if(mirror_left){
736 int r= 3*2*ref[0];
737 r += r>>4;
738 r += r>>8;
739 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740 dst += dst_step;
741 src += src_step;
744 for(i=0; i<w; i++){
745 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746 r += r>>4;
747 r += r>>8;
748 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
751 if(mirror_right){
752 int r= 3*2*ref[w*ref_step];
753 r += r>>4;
754 r += r>>8;
755 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
759 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
760 const int mirror_left= !highpass;
761 const int mirror_right= (width&1) ^ highpass;
762 const int w= (width>>1) - 1 + (highpass & width);
763 int i;
765 assert(shift == 4);
766 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
767 if(mirror_left){
768 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
769 dst += dst_step;
770 src += src_step;
773 for(i=0; i<w; i++){
774 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
777 if(mirror_right){
778 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
783 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
784 int x, i;
786 for(x=start; x<width; x+=2){
787 int64_t sum=0;
789 for(i=0; i<n; i++){
790 int x2= x + 2*i - n + 1;
791 if (x2< 0) x2= -x2;
792 else if(x2>=width) x2= 2*width-x2-2;
793 sum += coeffs[i]*(int64_t)dst[x2];
795 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
796 else dst[x] += (sum + (1<<shift)/2)>>shift;
800 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
801 int x, y, i;
802 for(y=start; y<height; y+=2){
803 for(x=0; x<width; x++){
804 int64_t sum=0;
806 for(i=0; i<n; i++){
807 int y2= y + 2*i - n + 1;
808 if (y2< 0) y2= -y2;
809 else if(y2>=height) y2= 2*height-y2-2;
810 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
812 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
813 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
818 #define SCALEX 1
819 #define LX0 0
820 #define LX1 1
822 #if 0 // more accurate 9/7
823 #define N1 2
824 #define SHIFT1 14
825 #define COEFFS1 (int[]){-25987,-25987}
826 #define N2 2
827 #define SHIFT2 19
828 #define COEFFS2 (int[]){-27777,-27777}
829 #define N3 2
830 #define SHIFT3 15
831 #define COEFFS3 (int[]){28931,28931}
832 #define N4 2
833 #define SHIFT4 15
834 #define COEFFS4 (int[]){14533,14533}
835 #elif 1 // 13/7 CRF
836 #define N1 4
837 #define SHIFT1 4
838 #define COEFFS1 (int[]){1,-9,-9,1}
839 #define N2 4
840 #define SHIFT2 4
841 #define COEFFS2 (int[]){-1,5,5,-1}
842 #define N3 0
843 #define SHIFT3 1
844 #define COEFFS3 NULL
845 #define N4 0
846 #define SHIFT4 1
847 #define COEFFS4 NULL
848 #elif 1 // 3/5
849 #define LX0 1
850 #define LX1 0
851 #define SCALEX 0.5
852 #define N1 2
853 #define SHIFT1 1
854 #define COEFFS1 (int[]){1,1}
855 #define N2 2
856 #define SHIFT2 2
857 #define COEFFS2 (int[]){-1,-1}
858 #define N3 0
859 #define SHIFT3 0
860 #define COEFFS3 NULL
861 #define N4 0
862 #define SHIFT4 0
863 #define COEFFS4 NULL
864 #elif 1 // 11/5
865 #define N1 0
866 #define SHIFT1 1
867 #define COEFFS1 NULL
868 #define N2 2
869 #define SHIFT2 2
870 #define COEFFS2 (int[]){-1,-1}
871 #define N3 2
872 #define SHIFT3 0
873 #define COEFFS3 (int[]){-1,-1}
874 #define N4 4
875 #define SHIFT4 7
876 #define COEFFS4 (int[]){-5,29,29,-5}
877 #define SCALEX 4
878 #elif 1 // 9/7 CDF
879 #define N1 2
880 #define SHIFT1 7
881 #define COEFFS1 (int[]){-203,-203}
882 #define N2 2
883 #define SHIFT2 12
884 #define COEFFS2 (int[]){-217,-217}
885 #define N3 2
886 #define SHIFT3 7
887 #define COEFFS3 (int[]){113,113}
888 #define N4 2
889 #define SHIFT4 9
890 #define COEFFS4 (int[]){227,227}
891 #define SCALEX 1
892 #elif 1 // 7/5 CDF
893 #define N1 0
894 #define SHIFT1 1
895 #define COEFFS1 NULL
896 #define N2 2
897 #define SHIFT2 2
898 #define COEFFS2 (int[]){-1,-1}
899 #define N3 2
900 #define SHIFT3 0
901 #define COEFFS3 (int[]){-1,-1}
902 #define N4 2
903 #define SHIFT4 4
904 #define COEFFS4 (int[]){3,3}
905 #elif 1 // 9/7 MN
906 #define N1 4
907 #define SHIFT1 4
908 #define COEFFS1 (int[]){1,-9,-9,1}
909 #define N2 2
910 #define SHIFT2 2
911 #define COEFFS2 (int[]){1,1}
912 #define N3 0
913 #define SHIFT3 1
914 #define COEFFS3 NULL
915 #define N4 0
916 #define SHIFT4 1
917 #define COEFFS4 NULL
918 #else // 13/7 CRF
919 #define N1 4
920 #define SHIFT1 4
921 #define COEFFS1 (int[]){1,-9,-9,1}
922 #define N2 4
923 #define SHIFT2 4
924 #define COEFFS2 (int[]){-1,5,5,-1}
925 #define N3 0
926 #define SHIFT3 1
927 #define COEFFS3 NULL
928 #define N4 0
929 #define SHIFT4 1
930 #define COEFFS4 NULL
931 #endif
932 static void horizontal_decomposeX(DWTELEM *b, int width){
933 DWTELEM temp[width];
934 const int width2= width>>1;
935 const int w2= (width+1)>>1;
936 int A1,A2,A3,A4, x;
938 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
939 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
940 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
941 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
943 for(x=0; x<width2; x++){
944 temp[x ]= b[2*x ];
945 temp[x+w2]= b[2*x + 1];
947 if(width&1)
948 temp[x ]= b[2*x ];
949 memcpy(b, temp, width*sizeof(int));
952 static void horizontal_composeX(DWTELEM *b, int width){
953 DWTELEM temp[width];
954 const int width2= width>>1;
955 int A1,A2,A3,A4, x;
956 const int w2= (width+1)>>1;
958 memcpy(temp, b, width*sizeof(int));
959 for(x=0; x<width2; x++){
960 b[2*x ]= temp[x ];
961 b[2*x + 1]= temp[x+w2];
963 if(width&1)
964 b[2*x ]= temp[x ];
966 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
967 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
968 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
969 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
972 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
973 int x, y;
975 for(y=0; y<height; y++){
976 for(x=0; x<width; x++){
977 buffer[y*stride + x] *= SCALEX;
981 for(y=0; y<height; y++){
982 horizontal_decomposeX(buffer + y*stride, width);
985 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
986 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
987 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
988 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
991 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
992 int x, y;
994 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
995 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
996 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
997 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
999 for(y=0; y<height; y++){
1000 horizontal_composeX(buffer + y*stride, width);
1003 for(y=0; y<height; y++){
1004 for(x=0; x<width; x++){
1005 buffer[y*stride + x] /= SCALEX;
1010 static void horizontal_decompose53i(DWTELEM *b, int width){
1011 DWTELEM temp[width];
1012 const int width2= width>>1;
1013 int A1,A2,A3,A4, x;
1014 const int w2= (width+1)>>1;
1016 for(x=0; x<width2; x++){
1017 temp[x ]= b[2*x ];
1018 temp[x+w2]= b[2*x + 1];
1020 if(width&1)
1021 temp[x ]= b[2*x ];
1022 #if 0
1023 A2= temp[1 ];
1024 A4= temp[0 ];
1025 A1= temp[0+width2];
1026 A1 -= (A2 + A4)>>1;
1027 A4 += (A1 + 1)>>1;
1028 b[0+width2] = A1;
1029 b[0 ] = A4;
1030 for(x=1; x+1<width2; x+=2){
1031 A3= temp[x+width2];
1032 A4= temp[x+1 ];
1033 A3 -= (A2 + A4)>>1;
1034 A2 += (A1 + A3 + 2)>>2;
1035 b[x+width2] = A3;
1036 b[x ] = A2;
1038 A1= temp[x+1+width2];
1039 A2= temp[x+2 ];
1040 A1 -= (A2 + A4)>>1;
1041 A4 += (A1 + A3 + 2)>>2;
1042 b[x+1+width2] = A1;
1043 b[x+1 ] = A4;
1045 A3= temp[width-1];
1046 A3 -= A2;
1047 A2 += (A1 + A3 + 2)>>2;
1048 b[width -1] = A3;
1049 b[width2-1] = A2;
1050 #else
1051 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1052 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1053 #endif
1056 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1057 int i;
1059 for(i=0; i<width; i++){
1060 b1[i] -= (b0[i] + b2[i])>>1;
1064 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1065 int i;
1067 for(i=0; i<width; i++){
1068 b1[i] += (b0[i] + b2[i] + 2)>>2;
1072 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1073 int y;
1074 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1075 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1077 for(y=-2; y<height; y+=2){
1078 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1079 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1081 {START_TIMER
1082 if(b1 <= b3) horizontal_decompose53i(b2, width);
1083 if(y+2 < height) horizontal_decompose53i(b3, width);
1084 STOP_TIMER("horizontal_decompose53i")}
1086 {START_TIMER
1087 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1088 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1089 STOP_TIMER("vertical_decompose53i*")}
1091 b0=b2;
1092 b1=b3;
1096 #define liftS lift
1097 #define lift5 lift
1098 #if 1
1099 #define W_AM 3
1100 #define W_AO 0
1101 #define W_AS 1
1103 #undef liftS
1104 #define W_BM 1
1105 #define W_BO 8
1106 #define W_BS 4
1108 #define W_CM 1
1109 #define W_CO 0
1110 #define W_CS 0
1112 #define W_DM 3
1113 #define W_DO 4
1114 #define W_DS 3
1115 #elif 0
1116 #define W_AM 55
1117 #define W_AO 16
1118 #define W_AS 5
1120 #define W_BM 3
1121 #define W_BO 32
1122 #define W_BS 6
1124 #define W_CM 127
1125 #define W_CO 64
1126 #define W_CS 7
1128 #define W_DM 7
1129 #define W_DO 8
1130 #define W_DS 4
1131 #elif 0
1132 #define W_AM 97
1133 #define W_AO 32
1134 #define W_AS 6
1136 #define W_BM 63
1137 #define W_BO 512
1138 #define W_BS 10
1140 #define W_CM 13
1141 #define W_CO 8
1142 #define W_CS 4
1144 #define W_DM 15
1145 #define W_DO 16
1146 #define W_DS 5
1148 #else
1150 #define W_AM 203
1151 #define W_AO 64
1152 #define W_AS 7
1154 #define W_BM 217
1155 #define W_BO 2048
1156 #define W_BS 12
1158 #define W_CM 113
1159 #define W_CO 64
1160 #define W_CS 7
1162 #define W_DM 227
1163 #define W_DO 128
1164 #define W_DS 9
1165 #endif
1166 static void horizontal_decompose97i(DWTELEM *b, int width){
1167 DWTELEM temp[width];
1168 const int w2= (width+1)>>1;
1170 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1171 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1172 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1173 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1177 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1178 int i;
1180 for(i=0; i<width; i++){
1181 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1185 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1186 int i;
1188 for(i=0; i<width; i++){
1189 #ifdef lift5
1190 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1191 #else
1192 int r= 3*(b0[i] + b2[i]);
1193 r+= r>>4;
1194 r+= r>>8;
1195 b1[i] += (r+W_CO)>>W_CS;
1196 #endif
1200 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1201 int i;
1203 for(i=0; i<width; i++){
1204 #ifdef liftS
1205 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1206 #else
1207 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1208 #endif
1212 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213 int i;
1215 for(i=0; i<width; i++){
1216 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1220 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1221 int y;
1222 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1223 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1224 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1225 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1227 for(y=-4; y<height; y+=2){
1228 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1229 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1231 {START_TIMER
1232 if(b3 <= b5) horizontal_decompose97i(b4, width);
1233 if(y+4 < height) horizontal_decompose97i(b5, width);
1234 if(width>400){
1235 STOP_TIMER("horizontal_decompose97i")
1238 {START_TIMER
1239 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1240 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1241 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1242 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1244 if(width>400){
1245 STOP_TIMER("vertical_decompose97i")
1248 b0=b2;
1249 b1=b3;
1250 b2=b4;
1251 b3=b5;
1255 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1256 int level;
1258 for(level=0; level<decomposition_count; level++){
1259 switch(type){
1260 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1261 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1262 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1267 static void horizontal_compose53i(DWTELEM *b, int width){
1268 DWTELEM temp[width];
1269 const int width2= width>>1;
1270 const int w2= (width+1)>>1;
1271 int A1,A2,A3,A4, x;
1273 #if 0
1274 A2= temp[1 ];
1275 A4= temp[0 ];
1276 A1= temp[0+width2];
1277 A1 -= (A2 + A4)>>1;
1278 A4 += (A1 + 1)>>1;
1279 b[0+width2] = A1;
1280 b[0 ] = A4;
1281 for(x=1; x+1<width2; x+=2){
1282 A3= temp[x+width2];
1283 A4= temp[x+1 ];
1284 A3 -= (A2 + A4)>>1;
1285 A2 += (A1 + A3 + 2)>>2;
1286 b[x+width2] = A3;
1287 b[x ] = A2;
1289 A1= temp[x+1+width2];
1290 A2= temp[x+2 ];
1291 A1 -= (A2 + A4)>>1;
1292 A4 += (A1 + A3 + 2)>>2;
1293 b[x+1+width2] = A1;
1294 b[x+1 ] = A4;
1296 A3= temp[width-1];
1297 A3 -= A2;
1298 A2 += (A1 + A3 + 2)>>2;
1299 b[width -1] = A3;
1300 b[width2-1] = A2;
1301 #else
1302 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1303 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1304 #endif
1305 for(x=0; x<width2; x++){
1306 b[2*x ]= temp[x ];
1307 b[2*x + 1]= temp[x+w2];
1309 if(width&1)
1310 b[2*x ]= temp[x ];
1313 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1314 int i;
1316 for(i=0; i<width; i++){
1317 b1[i] += (b0[i] + b2[i])>>1;
1321 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1322 int i;
1324 for(i=0; i<width; i++){
1325 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1329 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1330 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1331 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1332 cs->y = -1;
1335 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1336 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1337 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1338 cs->y = -1;
1341 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1342 int y= cs->y;
1343 int mirror0 = mirror(y-1, height-1);
1344 int mirror1 = mirror(y , height-1);
1345 int mirror2 = mirror(y+1, height-1);
1346 int mirror3 = mirror(y+2, height-1);
1348 DWTELEM *b0= cs->b0;
1349 DWTELEM *b1= cs->b1;
1350 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1351 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1353 {START_TIMER
1354 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1355 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1356 STOP_TIMER("vertical_compose53i*")}
1358 {START_TIMER
1359 if(y-1 >= 0) horizontal_compose53i(b0, width);
1360 if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1361 STOP_TIMER("horizontal_compose53i")}
1363 cs->b0 = b2;
1364 cs->b1 = b3;
1365 cs->y += 2;
1368 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1369 int y= cs->y;
1370 DWTELEM *b0= cs->b0;
1371 DWTELEM *b1= cs->b1;
1372 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1373 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1375 {START_TIMER
1376 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1377 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1378 STOP_TIMER("vertical_compose53i*")}
1380 {START_TIMER
1381 if(y-1 >= 0) horizontal_compose53i(b0, width);
1382 if(b0 <= b2) horizontal_compose53i(b1, width);
1383 STOP_TIMER("horizontal_compose53i")}
1385 cs->b0 = b2;
1386 cs->b1 = b3;
1387 cs->y += 2;
1390 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1391 dwt_compose_t cs;
1392 spatial_compose53i_init(&cs, buffer, height, stride);
1393 while(cs.y <= height)
1394 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1398 static void horizontal_compose97i(DWTELEM *b, int width){
1399 DWTELEM temp[width];
1400 const int w2= (width+1)>>1;
1402 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1403 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1404 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1405 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1408 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1409 int i;
1411 for(i=0; i<width; i++){
1412 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1416 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1417 int i;
1419 for(i=0; i<width; i++){
1420 #ifdef lift5
1421 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1422 #else
1423 int r= 3*(b0[i] + b2[i]);
1424 r+= r>>4;
1425 r+= r>>8;
1426 b1[i] -= (r+W_CO)>>W_CS;
1427 #endif
1431 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432 int i;
1434 for(i=0; i<width; i++){
1435 #ifdef liftS
1436 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1437 #else
1438 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1439 #endif
1443 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1444 int i;
1446 for(i=0; i<width; i++){
1447 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1451 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1452 int i;
1454 for(i=0; i<width; i++){
1455 int r;
1456 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1457 #ifdef lift5
1458 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1459 #else
1460 r= 3*(b2[i] + b4[i]);
1461 r+= r>>4;
1462 r+= r>>8;
1463 b3[i] -= (r+W_CO)>>W_CS;
1464 #endif
1465 #ifdef liftS
1466 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1467 #else
1468 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1469 #endif
1470 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1474 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1475 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1476 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1477 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1478 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1479 cs->y = -3;
1482 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1483 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1484 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1485 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1486 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1487 cs->y = -3;
1490 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1491 int y = cs->y;
1493 int mirror0 = mirror(y - 1, height - 1);
1494 int mirror1 = mirror(y + 0, height - 1);
1495 int mirror2 = mirror(y + 1, height - 1);
1496 int mirror3 = mirror(y + 2, height - 1);
1497 int mirror4 = mirror(y + 3, height - 1);
1498 int mirror5 = mirror(y + 4, height - 1);
1499 DWTELEM *b0= cs->b0;
1500 DWTELEM *b1= cs->b1;
1501 DWTELEM *b2= cs->b2;
1502 DWTELEM *b3= cs->b3;
1503 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1504 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1506 {START_TIMER
1507 if(y>0 && y+4<height){
1508 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1509 }else{
1510 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1511 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1512 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1513 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1515 if(width>400){
1516 STOP_TIMER("vertical_compose97i")}}
1518 {START_TIMER
1519 if(y-1>= 0) horizontal_compose97i(b0, width);
1520 if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1521 if(width>400 && mirror0 <= mirror2){
1522 STOP_TIMER("horizontal_compose97i")}}
1524 cs->b0=b2;
1525 cs->b1=b3;
1526 cs->b2=b4;
1527 cs->b3=b5;
1528 cs->y += 2;
1531 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1532 int y = cs->y;
1533 DWTELEM *b0= cs->b0;
1534 DWTELEM *b1= cs->b1;
1535 DWTELEM *b2= cs->b2;
1536 DWTELEM *b3= cs->b3;
1537 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1538 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1540 if(stride == width && y+4 < height && 0){
1541 int x;
1542 for(x=0; x<width/2; x++)
1543 b5[x] += 64*2;
1544 for(; x<width; x++)
1545 b5[x] += 169*2;
1548 {START_TIMER
1549 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1550 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1551 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1552 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1553 if(width>400){
1554 STOP_TIMER("vertical_compose97i")}}
1556 {START_TIMER
1557 if(y-1>= 0) horizontal_compose97i(b0, width);
1558 if(b0 <= b2) horizontal_compose97i(b1, width);
1559 if(width>400 && b0 <= b2){
1560 STOP_TIMER("horizontal_compose97i")}}
1562 cs->b0=b2;
1563 cs->b1=b3;
1564 cs->b2=b4;
1565 cs->b3=b5;
1566 cs->y += 2;
1569 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1570 dwt_compose_t cs;
1571 spatial_compose97i_init(&cs, buffer, height, stride);
1572 while(cs.y <= height)
1573 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1576 void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1577 int level;
1578 for(level=decomposition_count-1; level>=0; level--){
1579 switch(type){
1580 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1581 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1582 /* not slicified yet */
1583 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1584 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1589 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1590 int level;
1591 for(level=decomposition_count-1; level>=0; level--){
1592 switch(type){
1593 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1594 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1595 /* not slicified yet */
1596 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1601 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1602 const int support = type==1 ? 3 : 5;
1603 int level;
1604 if(type==2) return;
1606 for(level=decomposition_count-1; level>=0; level--){
1607 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1608 switch(type){
1609 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1610 break;
1611 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1612 break;
1613 case 2: break;
1619 void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1620 const int support = type==1 ? 3 : 5;
1621 int level;
1622 if(type==2) return;
1624 for(level=decomposition_count-1; level>=0; level--){
1625 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1626 switch(type){
1627 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1628 break;
1629 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1630 break;
1631 case 2: break;
1637 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1638 if(type==2){
1639 int level;
1640 for(level=decomposition_count-1; level>=0; level--)
1641 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1642 }else{
1643 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1644 int y;
1645 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1646 for(y=0; y<height; y+=4)
1647 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1651 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1652 const int w= b->width;
1653 const int h= b->height;
1654 int x, y;
1656 if(1){
1657 int run=0;
1658 int runs[w*h];
1659 int run_index=0;
1660 int max_index;
1662 for(y=0; y<h; y++){
1663 for(x=0; x<w; x++){
1664 int v, p=0;
1665 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1666 v= src[x + y*stride];
1668 if(y){
1669 t= src[x + (y-1)*stride];
1670 if(x){
1671 lt= src[x - 1 + (y-1)*stride];
1673 if(x + 1 < w){
1674 rt= src[x + 1 + (y-1)*stride];
1677 if(x){
1678 l= src[x - 1 + y*stride];
1679 /*if(x > 1){
1680 if(orientation==1) ll= src[y + (x-2)*stride];
1681 else ll= src[x - 2 + y*stride];
1684 if(parent){
1685 int px= x>>1;
1686 int py= y>>1;
1687 if(px<b->parent->width && py<b->parent->height)
1688 p= parent[px + py*2*stride];
1690 if(!(/*ll|*/l|lt|t|rt|p)){
1691 if(v){
1692 runs[run_index++]= run;
1693 run=0;
1694 }else{
1695 run++;
1700 max_index= run_index;
1701 runs[run_index++]= run;
1702 run_index=0;
1703 run= runs[run_index++];
1705 put_symbol2(&s->c, b->state[30], max_index, 0);
1706 if(run_index <= max_index)
1707 put_symbol2(&s->c, b->state[1], run, 3);
1709 for(y=0; y<h; y++){
1710 if(s->c.bytestream_end - s->c.bytestream < w*40){
1711 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1712 return -1;
1714 for(x=0; x<w; x++){
1715 int v, p=0;
1716 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1717 v= src[x + y*stride];
1719 if(y){
1720 t= src[x + (y-1)*stride];
1721 if(x){
1722 lt= src[x - 1 + (y-1)*stride];
1724 if(x + 1 < w){
1725 rt= src[x + 1 + (y-1)*stride];
1728 if(x){
1729 l= src[x - 1 + y*stride];
1730 /*if(x > 1){
1731 if(orientation==1) ll= src[y + (x-2)*stride];
1732 else ll= src[x - 2 + y*stride];
1735 if(parent){
1736 int px= x>>1;
1737 int py= y>>1;
1738 if(px<b->parent->width && py<b->parent->height)
1739 p= parent[px + py*2*stride];
1741 if(/*ll|*/l|lt|t|rt|p){
1742 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1744 put_rac(&s->c, &b->state[0][context], !!v);
1745 }else{
1746 if(!run){
1747 run= runs[run_index++];
1749 if(run_index <= max_index)
1750 put_symbol2(&s->c, b->state[1], run, 3);
1751 assert(v);
1752 }else{
1753 run--;
1754 assert(!v);
1757 if(v){
1758 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1759 int l2= 2*ABS(l) + (l<0);
1760 int t2= 2*ABS(t) + (t<0);
1762 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1763 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1768 return 0;
1771 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1772 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1773 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1774 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1775 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1778 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1779 const int w= b->width;
1780 const int h= b->height;
1781 int x,y;
1783 if(1){
1784 int run, runs;
1785 x_and_coeff *xc= b->x_coeff;
1786 x_and_coeff *prev_xc= NULL;
1787 x_and_coeff *prev2_xc= xc;
1788 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1789 x_and_coeff *prev_parent_xc= parent_xc;
1791 runs= get_symbol2(&s->c, b->state[30], 0);
1792 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1793 else run= INT_MAX;
1795 for(y=0; y<h; y++){
1796 int v=0;
1797 int lt=0, t=0, rt=0;
1799 if(y && prev_xc->x == 0){
1800 rt= prev_xc->coeff;
1802 for(x=0; x<w; x++){
1803 int p=0;
1804 const int l= v;
1806 lt= t; t= rt;
1808 if(y){
1809 if(prev_xc->x <= x)
1810 prev_xc++;
1811 if(prev_xc->x == x + 1)
1812 rt= prev_xc->coeff;
1813 else
1814 rt=0;
1816 if(parent_xc){
1817 if(x>>1 > parent_xc->x){
1818 parent_xc++;
1820 if(x>>1 == parent_xc->x){
1821 p= parent_xc->coeff;
1824 if(/*ll|*/l|lt|t|rt|p){
1825 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1827 v=get_rac(&s->c, &b->state[0][context]);
1828 if(v){
1829 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1830 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1832 xc->x=x;
1833 (xc++)->coeff= v;
1835 }else{
1836 if(!run){
1837 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1838 else run= INT_MAX;
1839 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1840 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1842 xc->x=x;
1843 (xc++)->coeff= v;
1844 }else{
1845 int max_run;
1846 run--;
1847 v=0;
1849 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1850 else max_run= FFMIN(run, w-x-1);
1851 if(parent_xc)
1852 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1853 x+= max_run;
1854 run-= max_run;
1858 (xc++)->x= w+1; //end marker
1859 prev_xc= prev2_xc;
1860 prev2_xc= xc;
1862 if(parent_xc){
1863 if(y&1){
1864 while(parent_xc->x != parent->width+1)
1865 parent_xc++;
1866 parent_xc++;
1867 prev_parent_xc= parent_xc;
1868 }else{
1869 parent_xc= prev_parent_xc;
1874 (xc++)->x= w+1; //end marker
1878 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1879 const int w= b->width;
1880 int x,y;
1881 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1882 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1883 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1884 int new_index = 0;
1886 START_TIMER
1888 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1889 qadd= 0;
1890 qmul= 1<<QEXPSHIFT;
1893 /* If we are on the second or later slice, restore our index. */
1894 if (start_y != 0)
1895 new_index = save_state[0];
1898 for(y=start_y; y<h; y++){
1899 int x = 0;
1900 int v;
1901 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1902 memset(line, 0, b->width*sizeof(DWTELEM));
1903 v = b->x_coeff[new_index].coeff;
1904 x = b->x_coeff[new_index++].x;
1905 while(x < w)
1907 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1908 register int u= -(v&1);
1909 line[x] = (t^u) - u;
1911 v = b->x_coeff[new_index].coeff;
1912 x = b->x_coeff[new_index++].x;
1915 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1916 STOP_TIMER("decode_subband")
1919 /* Save our variables for the next slice. */
1920 save_state[0] = new_index;
1922 return;
1925 static void reset_contexts(SnowContext *s){
1926 int plane_index, level, orientation;
1928 for(plane_index=0; plane_index<3; plane_index++){
1929 for(level=0; level<s->spatial_decomposition_count; level++){
1930 for(orientation=level ? 1:0; orientation<4; orientation++){
1931 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1935 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1936 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1939 static int alloc_blocks(SnowContext *s){
1940 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1941 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1943 s->b_width = w;
1944 s->b_height= h;
1946 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1947 return 0;
1950 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1951 uint8_t *bytestream= d->bytestream;
1952 uint8_t *bytestream_start= d->bytestream_start;
1953 *d= *s;
1954 d->bytestream= bytestream;
1955 d->bytestream_start= bytestream_start;
1958 //near copy & paste from dsputil, FIXME
1959 static int pix_sum(uint8_t * pix, int line_size, int w)
1961 int s, i, j;
1963 s = 0;
1964 for (i = 0; i < w; i++) {
1965 for (j = 0; j < w; j++) {
1966 s += pix[0];
1967 pix ++;
1969 pix += line_size - w;
1971 return s;
1974 //near copy & paste from dsputil, FIXME
1975 static int pix_norm1(uint8_t * pix, int line_size, int w)
1977 int s, i, j;
1978 uint32_t *sq = squareTbl + 256;
1980 s = 0;
1981 for (i = 0; i < w; i++) {
1982 for (j = 0; j < w; j ++) {
1983 s += sq[pix[0]];
1984 pix ++;
1986 pix += line_size - w;
1988 return s;
1991 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1992 const int w= s->b_width << s->block_max_depth;
1993 const int rem_depth= s->block_max_depth - level;
1994 const int index= (x + y*w) << rem_depth;
1995 const int block_w= 1<<rem_depth;
1996 BlockNode block;
1997 int i,j;
1999 block.color[0]= l;
2000 block.color[1]= cb;
2001 block.color[2]= cr;
2002 block.mx= mx;
2003 block.my= my;
2004 block.type= type;
2005 block.level= level;
2007 for(j=0; j<block_w; j++){
2008 for(i=0; i<block_w; i++){
2009 s->block[index + i + j*w]= block;
2014 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2015 const int offset[3]= {
2016 y*c-> stride + x,
2017 ((y*c->uvstride + x)>>1),
2018 ((y*c->uvstride + x)>>1),
2020 int i;
2021 for(i=0; i<3; i++){
2022 c->src[0][i]= src [i];
2023 c->ref[0][i]= ref [i] + offset[i];
2025 assert(!ref_index);
2028 //FIXME copy&paste
2029 #define P_LEFT P[1]
2030 #define P_TOP P[2]
2031 #define P_TOPRIGHT P[3]
2032 #define P_MEDIAN P[4]
2033 #define P_MV1 P[9]
2034 #define FLAG_QPEL 1 //must be 1
2036 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2037 uint8_t p_buffer[1024];
2038 uint8_t i_buffer[1024];
2039 uint8_t p_state[sizeof(s->block_state)];
2040 uint8_t i_state[sizeof(s->block_state)];
2041 RangeCoder pc, ic;
2042 uint8_t *pbbak= s->c.bytestream;
2043 uint8_t *pbbak_start= s->c.bytestream_start;
2044 int score, score2, iscore, i_len, p_len, block_s, sum;
2045 const int w= s->b_width << s->block_max_depth;
2046 const int h= s->b_height << s->block_max_depth;
2047 const int rem_depth= s->block_max_depth - level;
2048 const int index= (x + y*w) << rem_depth;
2049 const int block_w= 1<<(LOG2_MB_SIZE - level);
2050 static BlockNode null_block= { //FIXME add border maybe
2051 .color= {128,128,128},
2052 .mx= 0,
2053 .my= 0,
2054 .type= 0,
2055 .level= 0,
2057 int trx= (x+1)<<rem_depth;
2058 int try= (y+1)<<rem_depth;
2059 BlockNode *left = x ? &s->block[index-1] : &null_block;
2060 BlockNode *top = y ? &s->block[index-w] : &null_block;
2061 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2062 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2063 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2064 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2065 int pl = left->color[0];
2066 int pcb= left->color[1];
2067 int pcr= left->color[2];
2068 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2069 int pmy= mid_pred(left->my, top->my, tr->my);
2070 int mx=0, my=0;
2071 int l,cr,cb, i;
2072 const int stride= s->current_picture.linesize[0];
2073 const int uvstride= s->current_picture.linesize[1];
2074 const int instride= s->input_picture.linesize[0];
2075 const int uvinstride= s->input_picture.linesize[1];
2076 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
2077 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2078 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2079 uint8_t current_mb[3][stride*block_w];
2080 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2081 int P[10][2];
2082 int16_t last_mv[3][2];
2083 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2084 const int shift= 1+qpel;
2085 MotionEstContext *c= &s->m.me;
2086 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2087 int my_context= av_log2(2*ABS(left->my - top->my));
2088 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2090 assert(sizeof(s->block_state) >= 256);
2091 if(s->keyframe){
2092 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2093 return 0;
2096 //FIXME optimize
2097 for(i=0; i<block_w; i++)
2098 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2099 for(i=0; i<block_w>>1; i++)
2100 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2101 for(i=0; i<block_w>>1; i++)
2102 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2104 // clip predictors / edge ?
2106 P_LEFT[0]= left->mx;
2107 P_LEFT[1]= left->my;
2108 P_TOP [0]= top->mx;
2109 P_TOP [1]= top->my;
2110 P_TOPRIGHT[0]= tr->mx;
2111 P_TOPRIGHT[1]= tr->my;
2113 last_mv[0][0]= s->block[index].mx;
2114 last_mv[0][1]= s->block[index].my;
2115 last_mv[1][0]= right->mx;
2116 last_mv[1][1]= right->my;
2117 last_mv[2][0]= bottom->mx;
2118 last_mv[2][1]= bottom->my;
2120 s->m.mb_stride=2;
2121 s->m.mb_x=
2122 s->m.mb_y= 0;
2123 s->m.me.skip= 0;
2125 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2127 assert(s->m.me. stride == stride);
2128 assert(s->m.me.uvstride == uvstride);
2130 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2131 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2132 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2133 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2135 c->xmin = - x*block_w - 16+2;
2136 c->ymin = - y*block_w - 16+2;
2137 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2138 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2140 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2141 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2142 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2143 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2144 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2145 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2146 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2148 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2149 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2151 if (!y) {
2152 c->pred_x= P_LEFT[0];
2153 c->pred_y= P_LEFT[1];
2154 } else {
2155 c->pred_x = P_MEDIAN[0];
2156 c->pred_y = P_MEDIAN[1];
2159 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2160 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2162 assert(mx >= c->xmin);
2163 assert(mx <= c->xmax);
2164 assert(my >= c->ymin);
2165 assert(my <= c->ymax);
2167 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2168 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2169 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2171 // subpel search
2172 pc= s->c;
2173 pc.bytestream_start=
2174 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2175 memcpy(p_state, s->block_state, sizeof(s->block_state));
2177 if(level!=s->block_max_depth)
2178 put_rac(&pc, &p_state[4 + s_context], 1);
2179 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2180 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2181 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2182 p_len= pc.bytestream - pc.bytestream_start;
2183 score += (s->lambda2*(p_len*8
2184 + (pc.outstanding_count - s->c.outstanding_count)*8
2185 + (-av_log2(pc.range) + av_log2(s->c.range))
2186 ))>>FF_LAMBDA_SHIFT;
2188 block_s= block_w*block_w;
2189 sum = pix_sum(&current_mb[0][0], stride, block_w);
2190 l= (sum + block_s/2)/block_s;
2191 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2193 block_s= block_w*block_w>>2;
2194 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2195 cb= (sum + block_s/2)/block_s;
2196 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2197 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2198 cr= (sum + block_s/2)/block_s;
2199 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2201 ic= s->c;
2202 ic.bytestream_start=
2203 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2204 memcpy(i_state, s->block_state, sizeof(s->block_state));
2205 if(level!=s->block_max_depth)
2206 put_rac(&ic, &i_state[4 + s_context], 1);
2207 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2208 put_symbol(&ic, &i_state[32], l-pl , 1);
2209 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2210 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2211 i_len= ic.bytestream - ic.bytestream_start;
2212 iscore += (s->lambda2*(i_len*8
2213 + (ic.outstanding_count - s->c.outstanding_count)*8
2214 + (-av_log2(ic.range) + av_log2(s->c.range))
2215 ))>>FF_LAMBDA_SHIFT;
2217 // assert(score==256*256*256*64-1);
2218 assert(iscore < 255*255*256 + s->lambda2*10);
2219 assert(iscore >= 0);
2220 assert(l>=0 && l<=255);
2221 assert(pl>=0 && pl<=255);
2223 if(level==0){
2224 int varc= iscore >> 8;
2225 int vard= score >> 8;
2226 if (vard <= 64 || vard < varc)
2227 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2228 else
2229 c->scene_change_score+= s->m.qscale;
2232 if(level!=s->block_max_depth){
2233 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2234 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2235 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2236 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2237 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2238 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2240 if(score2 < score && score2 < iscore)
2241 return score2;
2244 if(iscore < score){
2245 memcpy(pbbak, i_buffer, i_len);
2246 s->c= ic;
2247 s->c.bytestream_start= pbbak_start;
2248 s->c.bytestream= pbbak + i_len;
2249 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2250 memcpy(s->block_state, i_state, sizeof(s->block_state));
2251 return iscore;
2252 }else{
2253 memcpy(pbbak, p_buffer, p_len);
2254 s->c= pc;
2255 s->c.bytestream_start= pbbak_start;
2256 s->c.bytestream= pbbak + p_len;
2257 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2258 memcpy(s->block_state, p_state, sizeof(s->block_state));
2259 return score;
2263 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2264 const int w= s->b_width << s->block_max_depth;
2265 const int rem_depth= s->block_max_depth - level;
2266 const int index= (x + y*w) << rem_depth;
2267 static BlockNode null_block= { //FIXME add border maybe
2268 .color= {128,128,128},
2269 .mx= 0,
2270 .my= 0,
2271 .type= 0,
2272 .level= 0,
2274 int trx= (x+1)<<rem_depth;
2275 BlockNode *left = x ? &s->block[index-1] : &null_block;
2276 BlockNode *top = y ? &s->block[index-w] : &null_block;
2277 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2278 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2279 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2281 if(s->keyframe){
2282 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2283 return;
2286 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2287 int type;
2288 int l = left->color[0];
2289 int cb= left->color[1];
2290 int cr= left->color[2];
2291 int mx= mid_pred(left->mx, top->mx, tr->mx);
2292 int my= mid_pred(left->my, top->my, tr->my);
2293 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2294 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2296 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2298 if(type){
2299 l += get_symbol(&s->c, &s->block_state[32], 1);
2300 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2301 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2302 }else{
2303 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2304 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2306 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2307 }else{
2308 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2309 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2310 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2311 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2315 static void encode_blocks(SnowContext *s){
2316 int x, y;
2317 int w= s->b_width;
2318 int h= s->b_height;
2320 for(y=0; y<h; y++){
2321 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2322 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2323 return;
2325 for(x=0; x<w; x++){
2326 encode_q_branch(s, 0, x, y);
2331 static void decode_blocks(SnowContext *s){
2332 int x, y;
2333 int w= s->b_width;
2334 int h= s->b_height;
2336 for(y=0; y<h; y++){
2337 for(x=0; x<w; x++){
2338 decode_q_branch(s, 0, x, y);
2343 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2344 int x, y;
2345 START_TIMER
2346 for(y=0; y < b_h+5; y++){
2347 for(x=0; x < b_w; x++){
2348 int a0= src[x ];
2349 int a1= src[x + 1];
2350 int a2= src[x + 2];
2351 int a3= src[x + 3];
2352 int a4= src[x + 4];
2353 int a5= src[x + 5];
2354 // int am= 9*(a1+a2) - (a0+a3);
2355 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2356 // int am= 18*(a2+a3) - 2*(a1+a4);
2357 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2358 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2360 // if(b_w==16) am= 8*(a1+a2);
2362 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2363 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2365 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2366 if(am&(~255)) am= ~(am>>31);
2368 tmp[x] = am;
2370 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2371 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2372 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2373 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2375 tmp += stride;
2376 src += stride;
2378 tmp -= (b_h+5)*stride;
2380 for(y=0; y < b_h; y++){
2381 for(x=0; x < b_w; x++){
2382 int a0= tmp[x + 0*stride];
2383 int a1= tmp[x + 1*stride];
2384 int a2= tmp[x + 2*stride];
2385 int a3= tmp[x + 3*stride];
2386 int a4= tmp[x + 4*stride];
2387 int a5= tmp[x + 5*stride];
2388 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2389 // int am= 18*(a2+a3) - 2*(a1+a4);
2390 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2391 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2393 // if(b_w==16) am= 8*(a1+a2);
2395 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2396 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2398 if(am&(~255)) am= ~(am>>31);
2400 dst[x] = am;
2401 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2402 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2403 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2404 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2406 dst += stride;
2407 tmp += stride;
2409 STOP_TIMER("mc_block")
2412 #define mca(dx,dy,b_w)\
2413 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2414 uint8_t tmp[stride*(b_w+5)];\
2415 assert(h==b_w);\
2416 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2419 mca( 0, 0,16)
2420 mca( 8, 0,16)
2421 mca( 0, 8,16)
2422 mca( 8, 8,16)
2423 mca( 0, 0,8)
2424 mca( 8, 0,8)
2425 mca( 0, 8,8)
2426 mca( 8, 8,8)
2428 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2429 if(block->type){
2430 int x, y;
2431 const int color= block->color[plane_index];
2432 for(y=0; y < b_h; y++){
2433 for(x=0; x < b_w; x++){
2434 dst[x + y*stride]= color;
2437 }else{
2438 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2439 int mx= block->mx*scale;
2440 int my= block->my*scale;
2441 const int dx= mx&15;
2442 const int dy= my&15;
2443 sx += (mx>>4) - 2;
2444 sy += (my>>4) - 2;
2445 src += sx + sy*stride;
2446 if( (unsigned)sx >= w - b_w - 4
2447 || (unsigned)sy >= h - b_h - 4){
2448 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2449 src= tmp + MB_SIZE;
2451 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2452 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2453 else
2454 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2458 static always_inline int same_block(BlockNode *a, BlockNode *b){
2459 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2462 //FIXME name clenup (b_w, block_w, b_width stuff)
2463 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2464 DWTELEM * dst = NULL;
2465 const int b_width = s->b_width << s->block_max_depth;
2466 const int b_height= s->b_height << s->block_max_depth;
2467 const int b_stride= b_width;
2468 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2469 BlockNode *rt= lt+1;
2470 BlockNode *lb= lt+b_stride;
2471 BlockNode *rb= lb+1;
2472 uint8_t *block[4];
2473 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2474 int x,y;
2476 if(b_x<0){
2477 lt= rt;
2478 lb= rb;
2479 }else if(b_x + 1 >= b_width){
2480 rt= lt;
2481 rb= lb;
2483 if(b_y<0){
2484 lt= lb;
2485 rt= rb;
2486 }else if(b_y + 1 >= b_height){
2487 lb= lt;
2488 rb= rt;
2491 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2492 obmc -= src_x;
2493 b_w += src_x;
2494 src_x=0;
2495 }else if(src_x + b_w > w){
2496 b_w = w - src_x;
2498 if(src_y<0){
2499 obmc -= src_y*obmc_stride;
2500 b_h += src_y;
2501 src_y=0;
2502 }else if(src_y + b_h> h){
2503 b_h = h - src_y;
2506 if(b_w<=0 || b_h<=0) return;
2508 assert(src_stride > 7*MB_SIZE);
2509 // old_dst += src_x + src_y*dst_stride;
2510 dst8+= src_x + src_y*src_stride;
2511 // src += src_x + src_y*src_stride;
2513 block[0]= tmp+3*MB_SIZE;
2514 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2516 if(same_block(lt, rt)){
2517 block[1]= block[0];
2518 }else{
2519 block[1]= tmp + 4*MB_SIZE;
2520 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2523 if(same_block(lt, lb)){
2524 block[2]= block[0];
2525 }else if(same_block(rt, lb)){
2526 block[2]= block[1];
2527 }else{
2528 block[2]= tmp+5*MB_SIZE;
2529 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2532 if(same_block(lt, rb) ){
2533 block[3]= block[0];
2534 }else if(same_block(rt, rb)){
2535 block[3]= block[1];
2536 }else if(same_block(lb, rb)){
2537 block[3]= block[2];
2538 }else{
2539 block[3]= tmp+6*MB_SIZE;
2540 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2542 #if 0
2543 for(y=0; y<b_h; y++){
2544 for(x=0; x<b_w; x++){
2545 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2546 if(add) dst[x + y*dst_stride] += v;
2547 else dst[x + y*dst_stride] -= v;
2550 for(y=0; y<b_h; y++){
2551 uint8_t *obmc2= obmc + (obmc_stride>>1);
2552 for(x=0; x<b_w; x++){
2553 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2554 if(add) dst[x + y*dst_stride] += v;
2555 else dst[x + y*dst_stride] -= v;
2558 for(y=0; y<b_h; y++){
2559 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2560 for(x=0; x<b_w; x++){
2561 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2562 if(add) dst[x + y*dst_stride] += v;
2563 else dst[x + y*dst_stride] -= v;
2566 for(y=0; y<b_h; y++){
2567 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2568 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2569 for(x=0; x<b_w; x++){
2570 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2571 if(add) dst[x + y*dst_stride] += v;
2572 else dst[x + y*dst_stride] -= v;
2575 #else
2578 START_TIMER
2580 int block_index = 0;
2581 for(y=0; y<b_h; y++){
2582 //FIXME ugly missue of obmc_stride
2583 uint8_t *obmc1= obmc + y*obmc_stride;
2584 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2585 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2586 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2587 dst = slice_buffer_get_line(sb, src_y + y);
2588 for(x=0; x<b_w; x++){
2589 int v= obmc1[x] * block[3][x + y*src_stride]
2590 +obmc2[x] * block[2][x + y*src_stride]
2591 +obmc3[x] * block[1][x + y*src_stride]
2592 +obmc4[x] * block[0][x + y*src_stride];
2594 v <<= 8 - LOG2_OBMC_MAX;
2595 if(FRAC_BITS != 8){
2596 v += 1<<(7 - FRAC_BITS);
2597 v >>= 8 - FRAC_BITS;
2599 if(add){
2600 // v += old_dst[x + y*dst_stride];
2601 v += dst[x + src_x];
2602 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2603 if(v&(~255)) v= ~(v>>31);
2604 dst8[x + y*src_stride] = v;
2605 }else{
2606 // old_dst[x + y*dst_stride] -= v;
2607 dst[x + src_x] -= v;
2611 STOP_TIMER("Inner add y block")
2613 #endif
2616 //FIXME name clenup (b_w, block_w, b_width stuff)
2617 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2618 const int b_width = s->b_width << s->block_max_depth;
2619 const int b_height= s->b_height << s->block_max_depth;
2620 const int b_stride= b_width;
2621 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2622 BlockNode *rt= lt+1;
2623 BlockNode *lb= lt+b_stride;
2624 BlockNode *rb= lb+1;
2625 uint8_t *block[4];
2626 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2627 int x,y;
2629 if(b_x<0){
2630 lt= rt;
2631 lb= rb;
2632 }else if(b_x + 1 >= b_width){
2633 rt= lt;
2634 rb= lb;
2636 if(b_y<0){
2637 lt= lb;
2638 rt= rb;
2639 }else if(b_y + 1 >= b_height){
2640 lb= lt;
2641 rb= rt;
2644 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2645 obmc -= src_x;
2646 b_w += src_x;
2647 src_x=0;
2648 }else if(src_x + b_w > w){
2649 b_w = w - src_x;
2651 if(src_y<0){
2652 obmc -= src_y*obmc_stride;
2653 b_h += src_y;
2654 src_y=0;
2655 }else if(src_y + b_h> h){
2656 b_h = h - src_y;
2659 if(b_w<=0 || b_h<=0) return;
2661 assert(src_stride > 7*MB_SIZE);
2662 dst += src_x + src_y*dst_stride;
2663 dst8+= src_x + src_y*src_stride;
2664 // src += src_x + src_y*src_stride;
2666 block[0]= tmp+3*MB_SIZE;
2667 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2669 if(same_block(lt, rt)){
2670 block[1]= block[0];
2671 }else{
2672 block[1]= tmp + 4*MB_SIZE;
2673 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2676 if(same_block(lt, lb)){
2677 block[2]= block[0];
2678 }else if(same_block(rt, lb)){
2679 block[2]= block[1];
2680 }else{
2681 block[2]= tmp+5*MB_SIZE;
2682 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2685 if(same_block(lt, rb) ){
2686 block[3]= block[0];
2687 }else if(same_block(rt, rb)){
2688 block[3]= block[1];
2689 }else if(same_block(lb, rb)){
2690 block[3]= block[2];
2691 }else{
2692 block[3]= tmp+6*MB_SIZE;
2693 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2695 #if 0
2696 for(y=0; y<b_h; y++){
2697 for(x=0; x<b_w; x++){
2698 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2699 if(add) dst[x + y*dst_stride] += v;
2700 else dst[x + y*dst_stride] -= v;
2703 for(y=0; y<b_h; y++){
2704 uint8_t *obmc2= obmc + (obmc_stride>>1);
2705 for(x=0; x<b_w; x++){
2706 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2707 if(add) dst[x + y*dst_stride] += v;
2708 else dst[x + y*dst_stride] -= v;
2711 for(y=0; y<b_h; y++){
2712 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2713 for(x=0; x<b_w; x++){
2714 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2715 if(add) dst[x + y*dst_stride] += v;
2716 else dst[x + y*dst_stride] -= v;
2719 for(y=0; y<b_h; y++){
2720 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2721 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2722 for(x=0; x<b_w; x++){
2723 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2724 if(add) dst[x + y*dst_stride] += v;
2725 else dst[x + y*dst_stride] -= v;
2728 #else
2729 for(y=0; y<b_h; y++){
2730 //FIXME ugly missue of obmc_stride
2731 uint8_t *obmc1= obmc + y*obmc_stride;
2732 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2733 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2734 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2735 for(x=0; x<b_w; x++){
2736 int v= obmc1[x] * block[3][x + y*src_stride]
2737 +obmc2[x] * block[2][x + y*src_stride]
2738 +obmc3[x] * block[1][x + y*src_stride]
2739 +obmc4[x] * block[0][x + y*src_stride];
2741 v <<= 8 - LOG2_OBMC_MAX;
2742 if(FRAC_BITS != 8){
2743 v += 1<<(7 - FRAC_BITS);
2744 v >>= 8 - FRAC_BITS;
2746 if(add){
2747 v += dst[x + y*dst_stride];
2748 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2749 if(v&(~255)) v= ~(v>>31);
2750 dst8[x + y*src_stride] = v;
2751 }else{
2752 dst[x + y*dst_stride] -= v;
2756 #endif
2759 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2760 Plane *p= &s->plane[plane_index];
2761 const int mb_w= s->b_width << s->block_max_depth;
2762 const int mb_h= s->b_height << s->block_max_depth;
2763 int x, y, mb_x;
2764 int block_size = MB_SIZE >> s->block_max_depth;
2765 int block_w = plane_index ? block_size/2 : block_size;
2766 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2767 int obmc_stride= plane_index ? block_size : 2*block_size;
2768 int ref_stride= s->current_picture.linesize[plane_index];
2769 uint8_t *ref = s->last_picture.data[plane_index];
2770 uint8_t *dst8= s->current_picture.data[plane_index];
2771 int w= p->width;
2772 int h= p->height;
2773 START_TIMER
2775 if(s->keyframe || (s->avctx->debug&512)){
2776 if(mb_y==mb_h)
2777 return;
2779 if(add){
2780 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2782 // DWTELEM * line = slice_buffer_get_line(sb, y);
2783 DWTELEM * line = sb->line[y];
2784 for(x=0; x<w; x++)
2786 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2787 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2788 v >>= FRAC_BITS;
2789 if(v&(~255)) v= ~(v>>31);
2790 dst8[x + y*ref_stride]= v;
2793 }else{
2794 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2796 // DWTELEM * line = slice_buffer_get_line(sb, y);
2797 DWTELEM * line = sb->line[y];
2798 for(x=0; x<w; x++)
2800 line[x] -= 128 << FRAC_BITS;
2801 // buf[x + y*w]-= 128<<FRAC_BITS;
2806 return;
2809 for(mb_x=0; mb_x<=mb_w; mb_x++){
2810 START_TIMER
2812 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2813 block_w*mb_x - block_w/2,
2814 block_w*mb_y - block_w/2,
2815 block_w, block_w,
2816 w, h,
2817 w, ref_stride, obmc_stride,
2818 mb_x - 1, mb_y - 1,
2819 add, plane_index);
2821 STOP_TIMER("add_yblock")
2824 STOP_TIMER("predict_slice")
2827 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2828 Plane *p= &s->plane[plane_index];
2829 const int mb_w= s->b_width << s->block_max_depth;
2830 const int mb_h= s->b_height << s->block_max_depth;
2831 int x, y, mb_x;
2832 int block_size = MB_SIZE >> s->block_max_depth;
2833 int block_w = plane_index ? block_size/2 : block_size;
2834 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2835 int obmc_stride= plane_index ? block_size : 2*block_size;
2836 int ref_stride= s->current_picture.linesize[plane_index];
2837 uint8_t *ref = s->last_picture.data[plane_index];
2838 uint8_t *dst8= s->current_picture.data[plane_index];
2839 int w= p->width;
2840 int h= p->height;
2841 START_TIMER
2843 if(s->keyframe || (s->avctx->debug&512)){
2844 if(mb_y==mb_h)
2845 return;
2847 if(add){
2848 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2849 for(x=0; x<w; x++){
2850 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2851 v >>= FRAC_BITS;
2852 if(v&(~255)) v= ~(v>>31);
2853 dst8[x + y*ref_stride]= v;
2856 }else{
2857 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2858 for(x=0; x<w; x++){
2859 buf[x + y*w]-= 128<<FRAC_BITS;
2864 return;
2867 for(mb_x=0; mb_x<=mb_w; mb_x++){
2868 START_TIMER
2870 add_yblock(s, buf, dst8, ref, obmc,
2871 block_w*mb_x - block_w/2,
2872 block_w*mb_y - block_w/2,
2873 block_w, block_w,
2874 w, h,
2875 w, ref_stride, obmc_stride,
2876 mb_x - 1, mb_y - 1,
2877 add, plane_index);
2879 STOP_TIMER("add_yblock")
2882 STOP_TIMER("predict_slice")
2885 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2886 const int mb_h= s->b_height << s->block_max_depth;
2887 int mb_y;
2888 for(mb_y=0; mb_y<=mb_h; mb_y++)
2889 predict_slice(s, buf, plane_index, add, mb_y);
2892 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2893 const int level= b->level;
2894 const int w= b->width;
2895 const int h= b->height;
2896 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2897 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2898 int x,y, thres1, thres2;
2899 START_TIMER
2901 if(s->qlog == LOSSLESS_QLOG) return;
2903 bias= bias ? 0 : (3*qmul)>>3;
2904 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2905 thres2= 2*thres1;
2907 if(!bias){
2908 for(y=0; y<h; y++){
2909 for(x=0; x<w; x++){
2910 int i= src[x + y*stride];
2912 if((unsigned)(i+thres1) > thres2){
2913 if(i>=0){
2914 i<<= QEXPSHIFT;
2915 i/= qmul; //FIXME optimize
2916 src[x + y*stride]= i;
2917 }else{
2918 i= -i;
2919 i<<= QEXPSHIFT;
2920 i/= qmul; //FIXME optimize
2921 src[x + y*stride]= -i;
2923 }else
2924 src[x + y*stride]= 0;
2927 }else{
2928 for(y=0; y<h; y++){
2929 for(x=0; x<w; x++){
2930 int i= src[x + y*stride];
2932 if((unsigned)(i+thres1) > thres2){
2933 if(i>=0){
2934 i<<= QEXPSHIFT;
2935 i= (i + bias) / qmul; //FIXME optimize
2936 src[x + y*stride]= i;
2937 }else{
2938 i= -i;
2939 i<<= QEXPSHIFT;
2940 i= (i + bias) / qmul; //FIXME optimize
2941 src[x + y*stride]= -i;
2943 }else
2944 src[x + y*stride]= 0;
2948 if(level+1 == s->spatial_decomposition_count){
2949 // STOP_TIMER("quantize")
2953 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
2954 const int w= b->width;
2955 const int h= b->height;
2956 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2957 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2958 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2959 int x,y;
2960 START_TIMER
2962 if(s->qlog == LOSSLESS_QLOG) return;
2964 for(y=start_y; y<end_y; y++){
2965 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2966 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2967 for(x=0; x<w; x++){
2968 int i= line[x];
2969 if(i<0){
2970 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2971 }else if(i>0){
2972 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2976 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2977 STOP_TIMER("dquant")
2981 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2982 const int w= b->width;
2983 const int h= b->height;
2984 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2985 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2986 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2987 int x,y;
2988 START_TIMER
2990 if(s->qlog == LOSSLESS_QLOG) return;
2992 for(y=0; y<h; y++){
2993 for(x=0; x<w; x++){
2994 int i= src[x + y*stride];
2995 if(i<0){
2996 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2997 }else if(i>0){
2998 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3002 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3003 STOP_TIMER("dquant")
3007 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3008 const int w= b->width;
3009 const int h= b->height;
3010 int x,y;
3012 for(y=h-1; y>=0; y--){
3013 for(x=w-1; x>=0; x--){
3014 int i= x + y*stride;
3016 if(x){
3017 if(use_median){
3018 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3019 else src[i] -= src[i - 1];
3020 }else{
3021 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3022 else src[i] -= src[i - 1];
3024 }else{
3025 if(y) src[i] -= src[i - stride];
3031 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3032 const int w= b->width;
3033 const int h= b->height;
3034 int x,y;
3036 // START_TIMER
3038 DWTELEM * line;
3039 DWTELEM * prev;
3041 if (start_y != 0)
3042 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3044 for(y=start_y; y<end_y; y++){
3045 prev = line;
3046 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3047 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3048 for(x=0; x<w; x++){
3049 if(x){
3050 if(use_median){
3051 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3052 else line[x] += line[x - 1];
3053 }else{
3054 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3055 else line[x] += line[x - 1];
3057 }else{
3058 if(y) line[x] += prev[x];
3063 // STOP_TIMER("correlate")
3066 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3067 const int w= b->width;
3068 const int h= b->height;
3069 int x,y;
3071 for(y=0; y<h; y++){
3072 for(x=0; x<w; x++){
3073 int i= x + y*stride;
3075 if(x){
3076 if(use_median){
3077 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3078 else src[i] += src[i - 1];
3079 }else{
3080 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3081 else src[i] += src[i - 1];
3083 }else{
3084 if(y) src[i] += src[i - stride];
3090 static void encode_header(SnowContext *s){
3091 int plane_index, level, orientation;
3092 uint8_t kstate[32];
3094 memset(kstate, MID_STATE, sizeof(kstate));
3096 put_rac(&s->c, kstate, s->keyframe);
3097 if(s->keyframe || s->always_reset)
3098 reset_contexts(s);
3099 if(s->keyframe){
3100 put_symbol(&s->c, s->header_state, s->version, 0);
3101 put_rac(&s->c, s->header_state, s->always_reset);
3102 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3103 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3104 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3105 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3106 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3107 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3108 put_rac(&s->c, s->header_state, s->spatial_scalability);
3109 // put_rac(&s->c, s->header_state, s->rate_scalability);
3111 for(plane_index=0; plane_index<2; plane_index++){
3112 for(level=0; level<s->spatial_decomposition_count; level++){
3113 for(orientation=level ? 1:0; orientation<4; orientation++){
3114 if(orientation==2) continue;
3115 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3120 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3121 put_symbol(&s->c, s->header_state, s->qlog, 1);
3122 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3123 put_symbol(&s->c, s->header_state, s->qbias, 1);
3124 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3127 static int decode_header(SnowContext *s){
3128 int plane_index, level, orientation;
3129 uint8_t kstate[32];
3131 memset(kstate, MID_STATE, sizeof(kstate));
3133 s->keyframe= get_rac(&s->c, kstate);
3134 if(s->keyframe || s->always_reset)
3135 reset_contexts(s);
3136 if(s->keyframe){
3137 s->version= get_symbol(&s->c, s->header_state, 0);
3138 if(s->version>0){
3139 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3140 return -1;
3142 s->always_reset= get_rac(&s->c, s->header_state);
3143 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3144 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3145 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3146 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3147 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3148 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3149 s->spatial_scalability= get_rac(&s->c, s->header_state);
3150 // s->rate_scalability= get_rac(&s->c, s->header_state);
3152 for(plane_index=0; plane_index<3; plane_index++){
3153 for(level=0; level<s->spatial_decomposition_count; level++){
3154 for(orientation=level ? 1:0; orientation<4; orientation++){
3155 int q;
3156 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3157 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3158 else q= get_symbol(&s->c, s->header_state, 1);
3159 s->plane[plane_index].band[level][orientation].qlog= q;
3165 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3166 if(s->spatial_decomposition_type > 2){
3167 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3168 return -1;
3171 s->qlog= get_symbol(&s->c, s->header_state, 1);
3172 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3173 s->qbias= get_symbol(&s->c, s->header_state, 1);
3174 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3176 return 0;
3179 static void init_qexp(){
3180 int i;
3181 double v=128;
3183 for(i=0; i<QROOT; i++){
3184 qexp[i]= lrintf(v);
3185 v *= pow(2, 1.0 / QROOT);
3189 static int common_init(AVCodecContext *avctx){
3190 SnowContext *s = avctx->priv_data;
3191 int width, height;
3192 int level, orientation, plane_index, dec;
3194 s->avctx= avctx;
3196 dsputil_init(&s->dsp, avctx);
3198 #define mcf(dx,dy)\
3199 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3200 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3201 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3202 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3203 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3204 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3206 mcf( 0, 0)
3207 mcf( 4, 0)
3208 mcf( 8, 0)
3209 mcf(12, 0)
3210 mcf( 0, 4)
3211 mcf( 4, 4)
3212 mcf( 8, 4)
3213 mcf(12, 4)
3214 mcf( 0, 8)
3215 mcf( 4, 8)
3216 mcf( 8, 8)
3217 mcf(12, 8)
3218 mcf( 0,12)
3219 mcf( 4,12)
3220 mcf( 8,12)
3221 mcf(12,12)
3223 #define mcfh(dx,dy)\
3224 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3225 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3226 mc_block_hpel ## dx ## dy ## 16;\
3227 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3228 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3229 mc_block_hpel ## dx ## dy ## 8;
3231 mcfh(0, 0)
3232 mcfh(8, 0)
3233 mcfh(0, 8)
3234 mcfh(8, 8)
3236 if(!qexp[0])
3237 init_qexp();
3239 dec= s->spatial_decomposition_count= 5;
3240 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3242 s->chroma_h_shift= 1; //FIXME XXX
3243 s->chroma_v_shift= 1;
3245 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3247 width= s->avctx->width;
3248 height= s->avctx->height;
3250 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3252 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3253 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3255 for(plane_index=0; plane_index<3; plane_index++){
3256 int w= s->avctx->width;
3257 int h= s->avctx->height;
3259 if(plane_index){
3260 w>>= s->chroma_h_shift;
3261 h>>= s->chroma_v_shift;
3263 s->plane[plane_index].width = w;
3264 s->plane[plane_index].height= h;
3265 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3266 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3267 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3268 SubBand *b= &s->plane[plane_index].band[level][orientation];
3270 b->buf= s->spatial_dwt_buffer;
3271 b->level= level;
3272 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3273 b->width = (w + !(orientation&1))>>1;
3274 b->height= (h + !(orientation>1))>>1;
3276 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3277 b->buf_x_offset = 0;
3278 b->buf_y_offset = 0;
3280 if(orientation&1){
3281 b->buf += (w+1)>>1;
3282 b->buf_x_offset = (w+1)>>1;
3284 if(orientation>1){
3285 b->buf += b->stride>>1;
3286 b->buf_y_offset = b->stride_line >> 1;
3289 if(level)
3290 b->parent= &s->plane[plane_index].band[level-1][orientation];
3291 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3293 w= (w+1)>>1;
3294 h= (h+1)>>1;
3298 reset_contexts(s);
3300 width= s->width= avctx->width;
3301 height= s->height= avctx->height;
3303 assert(width && height);
3305 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3307 return 0;
3311 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3312 int width = p->width;
3313 int height= p->height;
3314 int level, orientation, x, y;
3316 for(level=0; level<s->spatial_decomposition_count; level++){
3317 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3318 SubBand *b= &p->band[level][orientation];
3319 DWTELEM *buf= b->buf;
3320 int64_t error=0;
3322 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3323 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3324 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3325 for(y=0; y<height; y++){
3326 for(x=0; x<width; x++){
3327 int64_t d= s->spatial_dwt_buffer[x + y*width];
3328 error += d*d;
3332 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3333 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3338 static int encode_init(AVCodecContext *avctx)
3340 SnowContext *s = avctx->priv_data;
3341 int plane_index;
3343 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3344 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3345 "use vstrict=-2 / -strict -2 to use it anyway\n");
3346 return -1;
3349 common_init(avctx);
3350 alloc_blocks(s);
3352 s->version=0;
3354 s->m.avctx = avctx;
3355 s->m.flags = avctx->flags;
3356 s->m.bit_rate= avctx->bit_rate;
3358 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3359 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3360 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3361 h263_encode_init(&s->m); //mv_penalty
3363 if(avctx->flags&CODEC_FLAG_PASS1){
3364 if(!avctx->stats_out)
3365 avctx->stats_out = av_mallocz(256);
3367 if(avctx->flags&CODEC_FLAG_PASS2){
3368 if(ff_rate_control_init(&s->m) < 0)
3369 return -1;
3372 for(plane_index=0; plane_index<3; plane_index++){
3373 calculate_vissual_weight(s, &s->plane[plane_index]);
3377 avctx->coded_frame= &s->current_picture;
3378 switch(avctx->pix_fmt){
3379 // case PIX_FMT_YUV444P:
3380 // case PIX_FMT_YUV422P:
3381 case PIX_FMT_YUV420P:
3382 case PIX_FMT_GRAY8:
3383 // case PIX_FMT_YUV411P:
3384 // case PIX_FMT_YUV410P:
3385 s->colorspace_type= 0;
3386 break;
3387 /* case PIX_FMT_RGBA32:
3388 s->colorspace= 1;
3389 break;*/
3390 default:
3391 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3392 return -1;
3394 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3395 s->chroma_h_shift= 1;
3396 s->chroma_v_shift= 1;
3397 return 0;
3400 static int frame_start(SnowContext *s){
3401 AVFrame tmp;
3402 int w= s->avctx->width; //FIXME round up to x16 ?
3403 int h= s->avctx->height;
3405 if(s->current_picture.data[0]){
3406 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3407 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3408 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3411 tmp= s->last_picture;
3412 s->last_picture= s->current_picture;
3413 s->current_picture= tmp;
3415 s->current_picture.reference= 1;
3416 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3417 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3418 return -1;
3421 return 0;
3424 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3425 SnowContext *s = avctx->priv_data;
3426 RangeCoder * const c= &s->c;
3427 AVFrame *pict = data;
3428 const int width= s->avctx->width;
3429 const int height= s->avctx->height;
3430 int level, orientation, plane_index;
3432 ff_init_range_encoder(c, buf, buf_size);
3433 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3435 s->input_picture = *pict;
3437 if(avctx->flags&CODEC_FLAG_PASS2){
3438 s->m.pict_type =
3439 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3440 s->keyframe= pict->pict_type==FF_I_TYPE;
3441 s->m.picture_number= avctx->frame_number;
3442 pict->quality= ff_rate_estimate_qscale(&s->m);
3443 }else{
3444 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3445 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3448 if(pict->quality){
3449 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3450 //<64 >60
3451 s->qlog += 61*QROOT/8;
3452 }else{
3453 s->qlog= LOSSLESS_QLOG;
3456 frame_start(s);
3457 s->current_picture.key_frame= s->keyframe;
3459 s->m.current_picture_ptr= &s->m.current_picture;
3460 if(pict->pict_type == P_TYPE){
3461 int block_width = (width +15)>>4;
3462 int block_height= (height+15)>>4;
3463 int stride= s->current_picture.linesize[0];
3465 assert(s->current_picture.data[0]);
3466 assert(s->last_picture.data[0]);
3468 s->m.avctx= s->avctx;
3469 s->m.current_picture.data[0]= s->current_picture.data[0];
3470 s->m. last_picture.data[0]= s-> last_picture.data[0];
3471 s->m. new_picture.data[0]= s-> input_picture.data[0];
3472 s->m. last_picture_ptr= &s->m. last_picture;
3473 s->m.linesize=
3474 s->m. last_picture.linesize[0]=
3475 s->m. new_picture.linesize[0]=
3476 s->m.current_picture.linesize[0]= stride;
3477 s->m.uvlinesize= s->current_picture.linesize[1];
3478 s->m.width = width;
3479 s->m.height= height;
3480 s->m.mb_width = block_width;
3481 s->m.mb_height= block_height;
3482 s->m.mb_stride= s->m.mb_width+1;
3483 s->m.b8_stride= 2*s->m.mb_width+1;
3484 s->m.f_code=1;
3485 s->m.pict_type= pict->pict_type;
3486 s->m.me_method= s->avctx->me_method;
3487 s->m.me.scene_change_score=0;
3488 s->m.flags= s->avctx->flags;
3489 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3490 s->m.out_format= FMT_H263;
3491 s->m.unrestricted_mv= 1;
3493 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3494 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3495 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3497 s->m.dsp= s->dsp; //move
3498 ff_init_me(&s->m);
3501 redo_frame:
3503 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3505 encode_header(s);
3506 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3507 encode_blocks(s);
3508 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3510 for(plane_index=0; plane_index<3; plane_index++){
3511 Plane *p= &s->plane[plane_index];
3512 int w= p->width;
3513 int h= p->height;
3514 int x, y;
3515 // int bits= put_bits_count(&s->c.pb);
3517 //FIXME optimize
3518 if(pict->data[plane_index]) //FIXME gray hack
3519 for(y=0; y<h; y++){
3520 for(x=0; x<w; x++){
3521 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3524 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3526 if( plane_index==0
3527 && pict->pict_type == P_TYPE
3528 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3529 ff_init_range_encoder(c, buf, buf_size);
3530 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3531 pict->pict_type= FF_I_TYPE;
3532 s->keyframe=1;
3533 reset_contexts(s);
3534 goto redo_frame;
3537 if(s->qlog == LOSSLESS_QLOG){
3538 for(y=0; y<h; y++){
3539 for(x=0; x<w; x++){
3540 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3545 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3547 for(level=0; level<s->spatial_decomposition_count; level++){
3548 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3549 SubBand *b= &p->band[level][orientation];
3551 quantize(s, b, b->buf, b->stride, s->qbias);
3552 if(orientation==0)
3553 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3554 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3555 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3556 if(orientation==0)
3557 correlate(s, b, b->buf, b->stride, 1, 0);
3560 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3562 for(level=0; level<s->spatial_decomposition_count; level++){
3563 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3564 SubBand *b= &p->band[level][orientation];
3566 dequantize(s, b, b->buf, b->stride);
3570 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3571 if(s->qlog == LOSSLESS_QLOG){
3572 for(y=0; y<h; y++){
3573 for(x=0; x<w; x++){
3574 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3578 {START_TIMER
3579 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3580 STOP_TIMER("pred-conv")}
3581 if(s->avctx->flags&CODEC_FLAG_PSNR){
3582 int64_t error= 0;
3584 if(pict->data[plane_index]) //FIXME gray hack
3585 for(y=0; y<h; y++){
3586 for(x=0; x<w; x++){
3587 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3588 error += d*d;
3591 s->avctx->error[plane_index] += error;
3592 s->current_picture.error[plane_index] = error;
3596 if(s->last_picture.data[0])
3597 avctx->release_buffer(avctx, &s->last_picture);
3599 s->current_picture.coded_picture_number = avctx->frame_number;
3600 s->current_picture.pict_type = pict->pict_type;
3601 s->current_picture.quality = pict->quality;
3602 if(avctx->flags&CODEC_FLAG_PASS1){
3603 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3604 s->m.current_picture.display_picture_number =
3605 s->m.current_picture.coded_picture_number = avctx->frame_number;
3606 s->m.pict_type = pict->pict_type;
3607 s->m.current_picture.quality = pict->quality;
3608 ff_write_pass1_stats(&s->m);
3610 if(avctx->flags&CODEC_FLAG_PASS2){
3611 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3614 emms_c();
3616 return ff_rac_terminate(c);
3619 static void common_end(SnowContext *s){
3620 int plane_index, level, orientation;
3622 av_freep(&s->spatial_dwt_buffer);
3624 av_freep(&s->m.me.scratchpad);
3625 av_freep(&s->m.me.map);
3626 av_freep(&s->m.me.score_map);
3628 av_freep(&s->block);
3630 for(plane_index=0; plane_index<3; plane_index++){
3631 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3632 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3633 SubBand *b= &s->plane[plane_index].band[level][orientation];
3635 av_freep(&b->x_coeff);
3641 static int encode_end(AVCodecContext *avctx)
3643 SnowContext *s = avctx->priv_data;
3645 common_end(s);
3646 av_free(avctx->stats_out);
3648 return 0;
3651 static int decode_init(AVCodecContext *avctx)
3653 SnowContext *s = avctx->priv_data;
3654 int block_size;
3656 avctx->pix_fmt= PIX_FMT_YUV420P;
3658 common_init(avctx);
3660 block_size = MB_SIZE >> s->block_max_depth;
3661 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
3663 return 0;
3666 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3667 SnowContext *s = avctx->priv_data;
3668 RangeCoder * const c= &s->c;
3669 int bytes_read;
3670 AVFrame *picture = data;
3671 int level, orientation, plane_index;
3673 ff_init_range_decoder(c, buf, buf_size);
3674 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3676 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3677 decode_header(s);
3678 if(!s->block) alloc_blocks(s);
3680 frame_start(s);
3681 //keyframe flag dupliaction mess FIXME
3682 if(avctx->debug&FF_DEBUG_PICT_INFO)
3683 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3685 decode_blocks(s);
3687 for(plane_index=0; plane_index<3; plane_index++){
3688 Plane *p= &s->plane[plane_index];
3689 int w= p->width;
3690 int h= p->height;
3691 int x, y;
3692 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3694 if(s->avctx->debug&2048){
3695 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3696 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3698 for(y=0; y<h; y++){
3699 for(x=0; x<w; x++){
3700 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3701 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3706 { START_TIMER
3707 for(level=0; level<s->spatial_decomposition_count; level++){
3708 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3709 SubBand *b= &p->band[level][orientation];
3710 unpack_coeffs(s, b, b->parent, orientation);
3713 STOP_TIMER("unpack coeffs");
3716 {START_TIMER
3717 const int mb_h= s->b_height << s->block_max_depth;
3718 const int block_size = MB_SIZE >> s->block_max_depth;
3719 const int block_w = plane_index ? block_size/2 : block_size;
3720 int mb_y;
3721 dwt_compose_t cs[MAX_DECOMPOSITIONS];
3722 int yd=0, yq=0;
3723 int y;
3724 int end_y;
3726 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3727 for(mb_y=0; mb_y<=mb_h; mb_y++){
3729 int slice_starty = block_w*mb_y;
3730 int slice_h = block_w*(mb_y+1);
3731 if (!(s->keyframe || s->avctx->debug&512)){
3732 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
3733 slice_h -= (block_w >> 1);
3737 START_TIMER
3738 for(level=0; level<s->spatial_decomposition_count; level++){
3739 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3740 SubBand *b= &p->band[level][orientation];
3741 int start_y;
3742 int end_y;
3743 int our_mb_start = mb_y;
3744 int our_mb_end = (mb_y + 1);
3745 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
3746 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
3747 if (!(s->keyframe || s->avctx->debug&512)){
3748 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3749 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3751 start_y = FFMIN(b->height, start_y);
3752 end_y = FFMIN(b->height, end_y);
3754 if (start_y != end_y){
3755 if (orientation == 0){
3756 SubBand * correlate_band = &p->band[0][0];
3757 int correlate_end_y = FFMIN(b->height, end_y + 1);
3758 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
3759 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
3760 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
3761 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
3763 else
3764 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3768 STOP_TIMER("decode_subband_slice");
3771 { START_TIMER
3772 for(; yd<slice_h; yd+=4){
3773 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3775 STOP_TIMER("idwt slice");}
3778 if(s->qlog == LOSSLESS_QLOG){
3779 for(; yq<slice_h && yq<h; yq++){
3780 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3781 for(x=0; x<w; x++){
3782 line[x] <<= FRAC_BITS;
3787 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3789 y = FFMIN(p->height, slice_starty);
3790 end_y = FFMIN(p->height, slice_h);
3791 while(y < end_y)
3792 slice_buffer_release(&s->sb, y++);
3795 slice_buffer_flush(&s->sb);
3797 STOP_TIMER("idwt + predict_slices")}
3800 emms_c();
3802 if(s->last_picture.data[0])
3803 avctx->release_buffer(avctx, &s->last_picture);
3805 if(!(s->avctx->debug&2048))
3806 *picture= s->current_picture;
3807 else
3808 *picture= s->mconly_picture;
3810 *data_size = sizeof(AVFrame);
3812 bytes_read= c->bytestream - c->bytestream_start;
3813 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3815 return bytes_read;
3818 static int decode_end(AVCodecContext *avctx)
3820 SnowContext *s = avctx->priv_data;
3822 slice_buffer_destroy(&s->sb);
3824 common_end(s);
3826 return 0;
3829 AVCodec snow_decoder = {
3830 "snow",
3831 CODEC_TYPE_VIDEO,
3832 CODEC_ID_SNOW,
3833 sizeof(SnowContext),
3834 decode_init,
3835 NULL,
3836 decode_end,
3837 decode_frame,
3838 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3839 NULL
3842 #ifdef CONFIG_ENCODERS
3843 AVCodec snow_encoder = {
3844 "snow",
3845 CODEC_TYPE_VIDEO,
3846 CODEC_ID_SNOW,
3847 sizeof(SnowContext),
3848 encode_init,
3849 encode_frame,
3850 encode_end,
3852 #endif
3855 #if 0
3856 #undef malloc
3857 #undef free
3858 #undef printf
3860 int main(){
3861 int width=256;
3862 int height=256;
3863 int buffer[2][width*height];
3864 SnowContext s;
3865 int i;
3866 s.spatial_decomposition_count=6;
3867 s.spatial_decomposition_type=1;
3869 printf("testing 5/3 DWT\n");
3870 for(i=0; i<width*height; i++)
3871 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3873 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3874 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3876 for(i=0; i<width*height; i++)
3877 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3879 printf("testing 9/7 DWT\n");
3880 s.spatial_decomposition_type=0;
3881 for(i=0; i<width*height; i++)
3882 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3884 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3885 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3887 for(i=0; i<width*height; i++)
3888 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3890 printf("testing AC coder\n");
3891 memset(s.header_state, 0, sizeof(s.header_state));
3892 ff_init_range_encoder(&s.c, buffer[0], 256*256);
3893 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3895 for(i=-256; i<256; i++){
3896 START_TIMER
3897 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3898 STOP_TIMER("put_symbol")
3900 ff_rac_terminate(&s.c);
3902 memset(s.header_state, 0, sizeof(s.header_state));
3903 ff_init_range_decoder(&s.c, buffer[0], 256*256);
3904 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3906 for(i=-256; i<256; i++){
3907 int j;
3908 START_TIMER
3909 j= get_symbol(&s.c, s.header_state, 1);
3910 STOP_TIMER("get_symbol")
3911 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3914 int level, orientation, x, y;
3915 int64_t errors[8][4];
3916 int64_t g=0;
3918 memset(errors, 0, sizeof(errors));
3919 s.spatial_decomposition_count=3;
3920 s.spatial_decomposition_type=0;
3921 for(level=0; level<s.spatial_decomposition_count; level++){
3922 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3923 int w= width >> (s.spatial_decomposition_count-level);
3924 int h= height >> (s.spatial_decomposition_count-level);
3925 int stride= width << (s.spatial_decomposition_count-level);
3926 DWTELEM *buf= buffer[0];
3927 int64_t error=0;
3929 if(orientation&1) buf+=w;
3930 if(orientation>1) buf+=stride>>1;
3932 memset(buffer[0], 0, sizeof(int)*width*height);
3933 buf[w/2 + h/2*stride]= 256*256;
3934 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3935 for(y=0; y<height; y++){
3936 for(x=0; x<width; x++){
3937 int64_t d= buffer[0][x + y*width];
3938 error += d*d;
3939 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3941 if(ABS(height/2-y)<9 && level==2) printf("\n");
3943 error= (int)(sqrt(error)+0.5);
3944 errors[level][orientation]= error;
3945 if(g) g=ff_gcd(g, error);
3946 else g= error;
3949 printf("static int const visual_weight[][4]={\n");
3950 for(level=0; level<s.spatial_decomposition_count; level++){
3951 printf(" {");
3952 for(orientation=0; orientation<4; orientation++){
3953 printf("%8lld,", errors[level][orientation]/g);
3955 printf("},\n");
3957 printf("};\n");
3959 int level=2;
3960 int orientation=3;
3961 int w= width >> (s.spatial_decomposition_count-level);
3962 int h= height >> (s.spatial_decomposition_count-level);
3963 int stride= width << (s.spatial_decomposition_count-level);
3964 DWTELEM *buf= buffer[0];
3965 int64_t error=0;
3967 buf+=w;
3968 buf+=stride>>1;
3970 memset(buffer[0], 0, sizeof(int)*width*height);
3971 #if 1
3972 for(y=0; y<height; y++){
3973 for(x=0; x<width; x++){
3974 int tab[4]={0,2,3,1};
3975 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3978 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3979 #else
3980 for(y=0; y<h; y++){
3981 for(x=0; x<w; x++){
3982 buf[x + y*stride ]=169;
3983 buf[x + y*stride-w]=64;
3986 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3987 #endif
3988 for(y=0; y<height; y++){
3989 for(x=0; x<width; x++){
3990 int64_t d= buffer[0][x + y*width];
3991 error += d*d;
3992 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3994 if(ABS(height/2-y)<9) printf("\n");
3999 return 0;
4001 #endif