aarch64: Add assembly support for -fsanitize=hwaddress tagged globals.
[libav.git] / libavcodec / svq3.c
blob667d3906a1fedcb54518b237a1082bb91c0ebb15
1 /*
2 * Copyright (c) 2003 The Libav Project
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
43 #include <inttypes.h>
45 #include "libavutil/attributes.h"
47 #include "bitstream.h"
48 #include "golomb.h"
49 #include "internal.h"
50 #include "avcodec.h"
51 #include "mpegutils.h"
52 #include "h264dec.h"
53 #include "h264data.h"
54 #include "hpeldsp.h"
55 #include "mathops.h"
56 #include "rectangle.h"
57 #include "tpeldsp.h"
59 #if CONFIG_ZLIB
60 #include <zlib.h>
61 #endif
63 #include "svq1.h"
65 /**
66 * @file
67 * svq3 decoder.
70 typedef struct SVQ3Frame {
71 AVFrame *f;
73 AVBufferRef *motion_val_buf[2];
74 int16_t (*motion_val[2])[2];
76 AVBufferRef *mb_type_buf;
77 uint32_t *mb_type;
80 AVBufferRef *ref_index_buf[2];
81 int8_t *ref_index[2];
82 } SVQ3Frame;
84 typedef struct SVQ3Context {
85 AVCodecContext *avctx;
87 H264DSPContext h264dsp;
88 H264PredContext hpc;
89 HpelDSPContext hdsp;
90 TpelDSPContext tdsp;
91 VideoDSPContext vdsp;
93 SVQ3Frame *cur_pic;
94 SVQ3Frame *next_pic;
95 SVQ3Frame *last_pic;
96 BitstreamContext bc;
97 BitstreamContext bc_slice;
98 uint8_t *slice_buf;
99 int slice_size;
100 int halfpel_flag;
101 int thirdpel_flag;
102 int unknown_flag;
103 uint32_t watermark_key;
104 int adaptive_quant;
105 int next_p_frame_damaged;
106 int h_edge_pos;
107 int v_edge_pos;
108 int last_frame_output;
109 int slice_num;
110 int qscale;
111 int cbp;
112 int frame_num;
113 int frame_num_offset;
114 int prev_frame_num_offset;
115 int prev_frame_num;
117 enum AVPictureType pict_type;
118 int low_delay;
120 int mb_x, mb_y;
121 int mb_xy;
122 int mb_width, mb_height;
123 int mb_stride, mb_num;
124 int b_stride;
126 uint32_t *mb2br_xy;
128 int chroma_pred_mode;
129 int intra16x16_pred_mode;
131 int8_t intra4x4_pred_mode_cache[5 * 8];
132 int8_t (*intra4x4_pred_mode);
134 unsigned int top_samples_available;
135 unsigned int topright_samples_available;
136 unsigned int left_samples_available;
138 uint8_t *edge_emu_buffer;
140 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
141 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
142 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
143 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
144 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
145 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
146 int block_offset[2 * (16 * 3)];
147 } SVQ3Context;
149 #define FULLPEL_MODE 1
150 #define HALFPEL_MODE 2
151 #define THIRDPEL_MODE 3
152 #define PREDICT_MODE 4
154 /* dual scan (from some older H.264 draft)
155 * o-->o-->o o
156 * | /|
157 * o o o / o
158 * | / | |/ |
159 * o o o o
161 * o-->o-->o-->o
163 static const uint8_t svq3_scan[16] = {
164 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
165 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
166 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
167 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
170 static const uint8_t luma_dc_zigzag_scan[16] = {
171 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
172 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
173 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
174 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
177 static const uint8_t svq3_pred_0[25][2] = {
178 { 0, 0 },
179 { 1, 0 }, { 0, 1 },
180 { 0, 2 }, { 1, 1 }, { 2, 0 },
181 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
182 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
183 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
184 { 2, 4 }, { 3, 3 }, { 4, 2 },
185 { 4, 3 }, { 3, 4 },
186 { 4, 4 }
189 static const int8_t svq3_pred_1[6][6][5] = {
190 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
191 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
192 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
193 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
194 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
195 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
196 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
197 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
198 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
199 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
200 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
201 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
204 static const struct {
205 uint8_t run;
206 uint8_t level;
207 } svq3_dct_tables[2][16] = {
208 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
209 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
210 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
211 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
214 static const uint32_t svq3_dequant_coeff[32] = {
215 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
216 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
217 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
218 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
221 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
223 const int qmul = svq3_dequant_coeff[qp];
224 #define stride 16
225 int i;
226 int temp[16];
227 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
229 for (i = 0; i < 4; i++) {
230 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
231 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
232 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
233 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
235 temp[4 * i + 0] = z0 + z3;
236 temp[4 * i + 1] = z1 + z2;
237 temp[4 * i + 2] = z1 - z2;
238 temp[4 * i + 3] = z0 - z3;
241 for (i = 0; i < 4; i++) {
242 const int offset = x_offset[i];
243 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
244 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
245 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
246 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
248 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
249 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
250 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
251 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
254 #undef stride
256 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
257 int stride, int qp, int dc)
259 const int qmul = svq3_dequant_coeff[qp];
260 int i;
262 if (dc) {
263 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
264 : qmul * (block[0] >> 3) / 2);
265 block[0] = 0;
268 for (i = 0; i < 4; i++) {
269 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
270 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
271 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
272 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
274 block[0 + 4 * i] = z0 + z3;
275 block[1 + 4 * i] = z1 + z2;
276 block[2 + 4 * i] = z1 - z2;
277 block[3 + 4 * i] = z0 - z3;
280 for (i = 0; i < 4; i++) {
281 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
282 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
283 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
284 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
285 const int rr = (dc + 0x80000);
287 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
288 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
289 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
290 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
293 memset(block, 0, 16 * sizeof(int16_t));
296 static inline int svq3_decode_block(BitstreamContext *bc, int16_t *block,
297 int index, const int type)
299 static const uint8_t *const scan_patterns[4] = {
300 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
303 int run, level, limit;
304 unsigned vlc;
305 const int intra = 3 * type >> 2;
306 const uint8_t *const scan = scan_patterns[type];
308 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
309 for (; (vlc = get_interleaved_ue_golomb(bc)) != 0; index++) {
310 int sign = (vlc & 1) ? 0 : -1;
311 vlc = vlc + 1 >> 1;
313 if (type == 3) {
314 if (vlc < 3) {
315 run = 0;
316 level = vlc;
317 } else if (vlc < 4) {
318 run = 1;
319 level = 1;
320 } else {
321 run = vlc & 0x3;
322 level = (vlc + 9 >> 2) - run;
324 } else {
325 if (vlc < 16) {
326 run = svq3_dct_tables[intra][vlc].run;
327 level = svq3_dct_tables[intra][vlc].level;
328 } else if (intra) {
329 run = vlc & 0x7;
330 level = (vlc >> 3) +
331 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
332 } else {
333 run = vlc & 0xF;
334 level = (vlc >> 4) +
335 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
339 if ((index += run) >= limit)
340 return -1;
342 block[scan[index]] = (level ^ sign) - sign;
345 if (type != 2) {
346 break;
350 return 0;
353 static av_always_inline int
354 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
355 int i, int list, int part_width)
357 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
359 if (topright_ref != PART_NOT_AVAILABLE) {
360 *C = s->mv_cache[list][i - 8 + part_width];
361 return topright_ref;
362 } else {
363 *C = s->mv_cache[list][i - 8 - 1];
364 return s->ref_cache[list][i - 8 - 1];
369 * Get the predicted MV.
370 * @param n the block index
371 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
372 * @param mx the x component of the predicted motion vector
373 * @param my the y component of the predicted motion vector
375 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
376 int part_width, int list,
377 int ref, int *const mx, int *const my)
379 const int index8 = scan8[n];
380 const int top_ref = s->ref_cache[list][index8 - 8];
381 const int left_ref = s->ref_cache[list][index8 - 1];
382 const int16_t *const A = s->mv_cache[list][index8 - 1];
383 const int16_t *const B = s->mv_cache[list][index8 - 8];
384 const int16_t *C;
385 int diagonal_ref, match_count;
387 /* mv_cache
388 * B . . A T T T T
389 * U . . L . . , .
390 * U . . L . . . .
391 * U . . L . . , .
392 * . . . L . . . .
395 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
396 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
397 if (match_count > 1) { //most common
398 *mx = mid_pred(A[0], B[0], C[0]);
399 *my = mid_pred(A[1], B[1], C[1]);
400 } else if (match_count == 1) {
401 if (left_ref == ref) {
402 *mx = A[0];
403 *my = A[1];
404 } else if (top_ref == ref) {
405 *mx = B[0];
406 *my = B[1];
407 } else {
408 *mx = C[0];
409 *my = C[1];
411 } else {
412 if (top_ref == PART_NOT_AVAILABLE &&
413 diagonal_ref == PART_NOT_AVAILABLE &&
414 left_ref != PART_NOT_AVAILABLE) {
415 *mx = A[0];
416 *my = A[1];
417 } else {
418 *mx = mid_pred(A[0], B[0], C[0]);
419 *my = mid_pred(A[1], B[1], C[1]);
424 static inline void svq3_mc_dir_part(SVQ3Context *s,
425 int x, int y, int width, int height,
426 int mx, int my, int dxy,
427 int thirdpel, int dir, int avg)
429 const SVQ3Frame *pic = (dir == 0) ? s->last_pic : s->next_pic;
430 uint8_t *src, *dest;
431 int i, emu = 0;
432 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
433 int linesize = s->cur_pic->f->linesize[0];
434 int uvlinesize = s->cur_pic->f->linesize[1];
436 mx += x;
437 my += y;
439 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
440 my < 0 || my >= s->v_edge_pos - height - 1) {
441 emu = 1;
442 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
443 my = av_clip(my, -16, s->v_edge_pos - height + 15);
446 /* form component predictions */
447 dest = s->cur_pic->f->data[0] + x + y * linesize;
448 src = pic->f->data[0] + mx + my * linesize;
450 if (emu) {
451 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
452 linesize, linesize,
453 width + 1, height + 1,
454 mx, my, s->h_edge_pos, s->v_edge_pos);
455 src = s->edge_emu_buffer;
457 if (thirdpel)
458 (avg ? s->tdsp.avg_tpel_pixels_tab
459 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
460 width, height);
461 else
462 (avg ? s->hdsp.avg_pixels_tab
463 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
464 height);
466 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
467 mx = mx + (mx < (int) x) >> 1;
468 my = my + (my < (int) y) >> 1;
469 width = width >> 1;
470 height = height >> 1;
471 blocksize++;
473 for (i = 1; i < 3; i++) {
474 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
475 src = pic->f->data[i] + mx + my * uvlinesize;
477 if (emu) {
478 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
479 uvlinesize, uvlinesize,
480 width + 1, height + 1,
481 mx, my, (s->h_edge_pos >> 1),
482 s->v_edge_pos >> 1);
483 src = s->edge_emu_buffer;
485 if (thirdpel)
486 (avg ? s->tdsp.avg_tpel_pixels_tab
487 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
488 uvlinesize,
489 width, height);
490 else
491 (avg ? s->hdsp.avg_pixels_tab
492 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
493 uvlinesize,
494 height);
499 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
500 int dir, int avg)
502 int i, j, k, mx, my, dx, dy, x, y;
503 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
504 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
505 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
506 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
507 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
509 for (i = 0; i < 16; i += part_height)
510 for (j = 0; j < 16; j += part_width) {
511 const int b_xy = (4 * s->mb_x + (j >> 2)) +
512 (4 * s->mb_y + (i >> 2)) * s->b_stride;
513 int dxy;
514 x = 16 * s->mb_x + j;
515 y = 16 * s->mb_y + i;
516 k = (j >> 2 & 1) + (i >> 1 & 2) +
517 (j >> 1 & 4) + (i & 8);
519 if (mode != PREDICT_MODE) {
520 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
521 } else {
522 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
523 my = s->next_pic->motion_val[0][b_xy][1] << 1;
525 if (dir == 0) {
526 mx = mx * s->frame_num_offset /
527 s->prev_frame_num_offset + 1 >> 1;
528 my = my * s->frame_num_offset /
529 s->prev_frame_num_offset + 1 >> 1;
530 } else {
531 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
532 s->prev_frame_num_offset + 1 >> 1;
533 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
534 s->prev_frame_num_offset + 1 >> 1;
538 /* clip motion vector prediction to frame border */
539 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
540 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
542 /* get (optional) motion vector differential */
543 if (mode == PREDICT_MODE) {
544 dx = dy = 0;
545 } else {
546 dy = get_interleaved_se_golomb(&s->bc_slice);
547 dx = get_interleaved_se_golomb(&s->bc_slice);
549 if (dx == INVALID_VLC || dy == INVALID_VLC) {
550 av_log(s->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
551 return -1;
555 /* compute motion vector */
556 if (mode == THIRDPEL_MODE) {
557 int fx, fy;
558 mx = (mx + 1 >> 1) + dx;
559 my = (my + 1 >> 1) + dy;
560 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
561 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
562 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
564 svq3_mc_dir_part(s, x, y, part_width, part_height,
565 fx, fy, dxy, 1, dir, avg);
566 mx += mx;
567 my += my;
568 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
569 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
570 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
571 dxy = (mx & 1) + 2 * (my & 1);
573 svq3_mc_dir_part(s, x, y, part_width, part_height,
574 mx >> 1, my >> 1, dxy, 0, dir, avg);
575 mx *= 3;
576 my *= 3;
577 } else {
578 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
579 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
581 svq3_mc_dir_part(s, x, y, part_width, part_height,
582 mx, my, 0, 0, dir, avg);
583 mx *= 6;
584 my *= 6;
587 /* update mv_cache */
588 if (mode != PREDICT_MODE) {
589 int32_t mv = pack16to32(mx, my);
591 if (part_height == 8 && i < 8) {
592 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
594 if (part_width == 8 && j < 8)
595 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
597 if (part_width == 8 && j < 8)
598 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
599 if (part_width == 4 || part_height == 4)
600 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
603 /* write back motion vectors */
604 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
605 part_width >> 2, part_height >> 2, s->b_stride,
606 pack16to32(mx, my), 4);
609 return 0;
612 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
613 int mb_type, const int *block_offset,
614 int linesize, uint8_t *dest_y)
616 int i;
617 if (!IS_INTRA4x4(mb_type)) {
618 for (i = 0; i < 16; i++)
619 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
620 uint8_t *const ptr = dest_y + block_offset[i];
621 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
622 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
627 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
628 int mb_type,
629 const int *block_offset,
630 int linesize,
631 uint8_t *dest_y)
633 int i;
634 int qscale = s->qscale;
636 if (IS_INTRA4x4(mb_type)) {
637 for (i = 0; i < 16; i++) {
638 uint8_t *const ptr = dest_y + block_offset[i];
639 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
641 uint8_t *topright;
642 int nnz, tr;
643 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
644 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
645 assert(s->mb_y || linesize <= block_offset[i]);
646 if (!topright_avail) {
647 tr = ptr[3 - linesize] * 0x01010101u;
648 topright = (uint8_t *)&tr;
649 } else
650 topright = ptr + 4 - linesize;
651 } else
652 topright = NULL;
654 s->hpc.pred4x4[dir](ptr, topright, linesize);
655 nnz = s->non_zero_count_cache[scan8[i]];
656 if (nnz) {
657 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
660 } else {
661 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
662 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
666 static void hl_decode_mb(SVQ3Context *s)
668 const int mb_x = s->mb_x;
669 const int mb_y = s->mb_y;
670 const int mb_xy = s->mb_xy;
671 const int mb_type = s->cur_pic->mb_type[mb_xy];
672 uint8_t *dest_y, *dest_cb, *dest_cr;
673 int linesize, uvlinesize;
674 int i, j;
675 const int *block_offset = &s->block_offset[0];
676 const int block_h = 16 >> 1;
678 linesize = s->cur_pic->f->linesize[0];
679 uvlinesize = s->cur_pic->f->linesize[1];
681 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
682 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
683 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
685 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
686 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
688 if (IS_INTRA(mb_type)) {
689 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
690 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
692 hl_decode_mb_predict_luma(s, mb_type, block_offset, linesize, dest_y);
695 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
697 if (s->cbp & 0x30) {
698 uint8_t *dest[2] = { dest_cb, dest_cr };
699 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
700 s->dequant4_coeff[4][0]);
701 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
702 s->dequant4_coeff[4][0]);
703 for (j = 1; j < 3; j++) {
704 for (i = j * 16; i < j * 16 + 4; i++)
705 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
706 uint8_t *const ptr = dest[j - 1] + block_offset[i];
707 svq3_add_idct_c(ptr, s->mb + i * 16,
708 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
714 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
716 int i, j, k, m, dir, mode;
717 int cbp = 0;
718 uint32_t vlc;
719 int8_t *top, *left;
720 const int mb_xy = s->mb_xy;
721 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
723 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
724 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
725 s->topright_samples_available = 0xFFFF;
727 if (mb_type == 0) { /* SKIP */
728 if (s->pict_type == AV_PICTURE_TYPE_P ||
729 s->next_pic->mb_type[mb_xy] == -1) {
730 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
731 0, 0, 0, 0, 0, 0);
733 if (s->pict_type == AV_PICTURE_TYPE_B)
734 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
735 0, 0, 0, 0, 1, 1);
737 mb_type = MB_TYPE_SKIP;
738 } else {
739 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
740 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
741 return -1;
742 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
743 return -1;
745 mb_type = MB_TYPE_16x16;
747 } else if (mb_type < 8) { /* INTER */
748 if (s->thirdpel_flag && s->halfpel_flag == !bitstream_read_bit(&s->bc_slice))
749 mode = THIRDPEL_MODE;
750 else if (s->halfpel_flag &&
751 s->thirdpel_flag == !bitstream_read_bit(&s->bc_slice))
752 mode = HALFPEL_MODE;
753 else
754 mode = FULLPEL_MODE;
756 /* fill caches */
757 /* note ref_cache should contain here:
758 * ????????
759 * ???11111
760 * N??11111
761 * N??11111
762 * N??11111
765 for (m = 0; m < 2; m++) {
766 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
767 for (i = 0; i < 4; i++)
768 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
769 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
770 } else {
771 for (i = 0; i < 4; i++)
772 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
774 if (s->mb_y > 0) {
775 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
776 s->cur_pic->motion_val[m][b_xy - s->b_stride],
777 4 * 2 * sizeof(int16_t));
778 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
779 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
781 if (s->mb_x < s->mb_width - 1) {
782 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
783 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
784 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
785 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
786 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
787 } else
788 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
789 if (s->mb_x > 0) {
790 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
791 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
792 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
793 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
794 } else
795 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
796 } else
797 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
798 PART_NOT_AVAILABLE, 8);
800 if (s->pict_type != AV_PICTURE_TYPE_B)
801 break;
804 /* decode motion vector(s) and form prediction(s) */
805 if (s->pict_type == AV_PICTURE_TYPE_P) {
806 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
807 return -1;
808 } else { /* AV_PICTURE_TYPE_B */
809 if (mb_type != 2) {
810 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
811 return -1;
812 } else {
813 for (i = 0; i < 4; i++)
814 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
815 0, 4 * 2 * sizeof(int16_t));
817 if (mb_type != 1) {
818 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
819 return -1;
820 } else {
821 for (i = 0; i < 4; i++)
822 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
823 0, 4 * 2 * sizeof(int16_t));
827 mb_type = MB_TYPE_16x16;
828 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
829 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
830 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
832 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
834 if (mb_type == 8) {
835 if (s->mb_x > 0) {
836 for (i = 0; i < 4; i++)
837 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
838 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
839 s->left_samples_available = 0x5F5F;
841 if (s->mb_y > 0) {
842 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
843 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
844 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
845 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
847 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
848 s->top_samples_available = 0x33FF;
851 /* decode prediction codes for luma blocks */
852 for (i = 0; i < 16; i += 2) {
853 vlc = get_interleaved_ue_golomb(&s->bc_slice);
855 if (vlc >= 25) {
856 av_log(s->avctx, AV_LOG_ERROR,
857 "luma prediction:%"PRIu32"\n", vlc);
858 return -1;
861 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
862 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
864 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
865 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
867 if (left[1] == -1 || left[2] == -1) {
868 av_log(s->avctx, AV_LOG_ERROR, "weird prediction\n");
869 return -1;
872 } else { /* mb_type == 33, DC_128_PRED block type */
873 for (i = 0; i < 4; i++)
874 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
877 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
878 i4x4[4] = i4x4_cache[7 + 8 * 3];
879 i4x4[5] = i4x4_cache[7 + 8 * 2];
880 i4x4[6] = i4x4_cache[7 + 8 * 1];
882 if (mb_type == 8) {
883 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
884 s->avctx, s->top_samples_available,
885 s->left_samples_available);
887 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
888 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
889 } else {
890 for (i = 0; i < 4; i++)
891 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
893 s->top_samples_available = 0x33FF;
894 s->left_samples_available = 0x5F5F;
897 mb_type = MB_TYPE_INTRA4x4;
898 } else { /* INTRA16x16 */
899 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
900 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
902 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
903 s->left_samples_available, dir, 0)) < 0) {
904 av_log(s->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
905 return s->intra16x16_pred_mode;
908 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
909 mb_type = MB_TYPE_INTRA16x16;
912 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
913 for (i = 0; i < 4; i++)
914 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
915 0, 4 * 2 * sizeof(int16_t));
916 if (s->pict_type == AV_PICTURE_TYPE_B) {
917 for (i = 0; i < 4; i++)
918 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
919 0, 4 * 2 * sizeof(int16_t));
922 if (!IS_INTRA4x4(mb_type)) {
923 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
925 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
926 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
929 if (!IS_INTRA16x16(mb_type) &&
930 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
931 if ((vlc = get_interleaved_ue_golomb(&s->bc_slice)) >= 48) {
932 av_log(s->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
933 return -1;
936 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
937 : ff_h264_golomb_to_inter_cbp[vlc];
939 if (IS_INTRA16x16(mb_type) ||
940 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
941 s->qscale += get_interleaved_se_golomb(&s->bc_slice);
943 if (s->qscale > 31u) {
944 av_log(s->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
945 return -1;
948 if (IS_INTRA16x16(mb_type)) {
949 AV_ZERO128(s->mb_luma_dc[0] + 0);
950 AV_ZERO128(s->mb_luma_dc[0] + 8);
951 if (svq3_decode_block(&s->bc_slice, s->mb_luma_dc[0], 0, 1)) {
952 av_log(s->avctx, AV_LOG_ERROR,
953 "error while decoding intra luma dc\n");
954 return -1;
958 if (cbp) {
959 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
960 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
962 for (i = 0; i < 4; i++)
963 if ((cbp & (1 << i))) {
964 for (j = 0; j < 4; j++) {
965 k = index ? (1 * (j & 1) + 2 * (i & 1) +
966 2 * (j & 2) + 4 * (i & 2))
967 : (4 * i + j);
968 s->non_zero_count_cache[scan8[k]] = 1;
970 if (svq3_decode_block(&s->bc_slice, &s->mb[16 * k], index, type)) {
971 av_log(s->avctx, AV_LOG_ERROR,
972 "error while decoding block\n");
973 return -1;
978 if ((cbp & 0x30)) {
979 for (i = 1; i < 3; ++i)
980 if (svq3_decode_block(&s->bc_slice, &s->mb[16 * 16 * i], 0, 3)) {
981 av_log(s->avctx, AV_LOG_ERROR,
982 "error while decoding chroma dc block\n");
983 return -1;
986 if ((cbp & 0x20)) {
987 for (i = 1; i < 3; i++) {
988 for (j = 0; j < 4; j++) {
989 k = 16 * i + j;
990 s->non_zero_count_cache[scan8[k]] = 1;
992 if (svq3_decode_block(&s->bc_slice, &s->mb[16 * k], 1, 1)) {
993 av_log(s->avctx, AV_LOG_ERROR,
994 "error while decoding chroma ac block\n");
995 return -1;
1003 s->cbp = cbp;
1004 s->cur_pic->mb_type[mb_xy] = mb_type;
1006 if (IS_INTRA(mb_type))
1007 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
1008 s->left_samples_available, DC_PRED8x8, 1);
1010 return 0;
1013 static int svq3_decode_slice_header(AVCodecContext *avctx)
1015 SVQ3Context *s = avctx->priv_data;
1016 const int mb_xy = s->mb_xy;
1017 int i, header;
1018 unsigned slice_id;
1020 header = bitstream_read(&s->bc, 8);
1022 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1023 /* TODO: what? */
1024 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1025 return -1;
1026 } else {
1027 int slice_bits, slice_bytes, slice_length;
1028 int length = header >> 5 & 3;
1030 slice_length = bitstream_peek(&s->bc, 8 * length);
1031 slice_bits = slice_length * 8;
1032 slice_bytes = slice_length + length - 1;
1034 bitstream_skip(&s->bc, 8);
1036 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1037 if (!s->slice_buf)
1038 return AVERROR(ENOMEM);
1040 if (slice_bytes * 8 > bitstream_bits_left(&s->bc)) {
1041 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1042 return AVERROR_INVALIDDATA;
1044 memcpy(s->slice_buf, s->bc.buffer + bitstream_tell(&s->bc) / 8, slice_bytes);
1046 if (s->watermark_key) {
1047 uint32_t header = AV_RL32(&s->bc_slice.buffer[1]);
1048 AV_WL32(&s->bc_slice.buffer[1], header ^ s->watermark_key);
1050 if (length > 0) {
1051 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1053 bitstream_skip(&s->bc, slice_bytes * 8);
1054 bitstream_init(&s->bc_slice, s->slice_buf, slice_bits);
1057 if ((slice_id = get_interleaved_ue_golomb(&s->bc_slice)) >= 3) {
1058 av_log(s->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1059 return -1;
1062 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1064 if ((header & 0x9F) == 2) {
1065 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1066 bitstream_read(&s->bc_slice, i);
1067 } else {
1068 bitstream_skip(&s->bc_slice, 1);
1071 s->slice_num = bitstream_read(&s->bc_slice, 8);
1072 s->qscale = bitstream_read(&s->bc_slice, 5);
1073 s->adaptive_quant = bitstream_read_bit(&s->bc_slice);
1075 /* unknown fields */
1076 bitstream_skip(&s->bc_slice, 1);
1078 if (s->unknown_flag)
1079 bitstream_skip(&s->bc_slice, 1);
1081 bitstream_skip(&s->bc_slice, 1);
1082 bitstream_skip(&s->bc_slice, 2);
1084 while (bitstream_read_bit(&s->bc_slice))
1085 bitstream_skip(&s->bc_slice, 8);
1087 /* reset intra predictors and invalidate motion vector references */
1088 if (s->mb_x > 0) {
1089 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1090 -1, 4 * sizeof(int8_t));
1091 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1092 -1, 8 * sizeof(int8_t) * s->mb_x);
1094 if (s->mb_y > 0) {
1095 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1096 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1098 if (s->mb_x > 0)
1099 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1102 return 0;
1105 static void init_dequant4_coeff_table(SVQ3Context *s)
1107 int q, x;
1108 const int max_qp = 51;
1110 for (q = 0; q < max_qp + 1; q++) {
1111 int shift = ff_h264_quant_div6[q] + 2;
1112 int idx = ff_h264_quant_rem6[q];
1113 for (x = 0; x < 16; x++)
1114 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1115 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1119 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1121 SVQ3Context *s = avctx->priv_data;
1122 int m, x, y;
1123 unsigned char *extradata;
1124 unsigned char *extradata_end;
1125 unsigned int size;
1126 int marker_found = 0;
1128 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1129 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1130 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1131 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1132 av_freep(&s->cur_pic);
1133 av_freep(&s->last_pic);
1134 av_freep(&s->next_pic);
1135 return AVERROR(ENOMEM);
1138 s->cur_pic->f = av_frame_alloc();
1139 s->last_pic->f = av_frame_alloc();
1140 s->next_pic->f = av_frame_alloc();
1141 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1142 return AVERROR(ENOMEM);
1144 ff_h264dsp_init(&s->h264dsp, 8, 1);
1145 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1146 ff_videodsp_init(&s->vdsp, 8);
1148 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1149 ff_tpeldsp_init(&s->tdsp);
1151 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1152 avctx->color_range = AVCOL_RANGE_JPEG;
1154 s->avctx = avctx;
1155 s->halfpel_flag = 1;
1156 s->thirdpel_flag = 1;
1157 s->unknown_flag = 0;
1159 /* prowl for the "SEQH" marker in the extradata */
1160 extradata = (unsigned char *)avctx->extradata;
1161 extradata_end = avctx->extradata + avctx->extradata_size;
1162 if (extradata) {
1163 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1164 if (!memcmp(extradata, "SEQH", 4)) {
1165 marker_found = 1;
1166 break;
1168 extradata++;
1172 /* if a match was found, parse the extra data */
1173 if (marker_found) {
1174 BitstreamContext bc;
1175 int frame_size_code;
1177 size = AV_RB32(&extradata[4]);
1178 if (size > extradata_end - extradata - 8)
1179 return AVERROR_INVALIDDATA;
1180 bitstream_init8(&bc, extradata + 8, size);
1182 /* 'frame size code' and optional 'width, height' */
1183 frame_size_code = bitstream_read(&bc, 3);
1184 switch (frame_size_code) {
1185 case 0:
1186 avctx->width = 160;
1187 avctx->height = 120;
1188 break;
1189 case 1:
1190 avctx->width = 128;
1191 avctx->height = 96;
1192 break;
1193 case 2:
1194 avctx->width = 176;
1195 avctx->height = 144;
1196 break;
1197 case 3:
1198 avctx->width = 352;
1199 avctx->height = 288;
1200 break;
1201 case 4:
1202 avctx->width = 704;
1203 avctx->height = 576;
1204 break;
1205 case 5:
1206 avctx->width = 240;
1207 avctx->height = 180;
1208 break;
1209 case 6:
1210 avctx->width = 320;
1211 avctx->height = 240;
1212 break;
1213 case 7:
1214 avctx->width = bitstream_read(&bc, 12);
1215 avctx->height = bitstream_read(&bc, 12);
1216 break;
1219 s->halfpel_flag = bitstream_read_bit(&bc);
1220 s->thirdpel_flag = bitstream_read_bit(&bc);
1222 /* unknown fields */
1223 bitstream_skip(&bc, 1);
1224 bitstream_skip(&bc, 1);
1225 bitstream_skip(&bc, 1);
1226 bitstream_skip(&bc, 1);
1228 s->low_delay = bitstream_read_bit(&bc);
1230 /* unknown field */
1231 bitstream_skip(&bc, 1);
1233 while (bitstream_read_bit(&bc))
1234 bitstream_skip(&bc, 8);
1236 s->unknown_flag = bitstream_read_bit(&bc);
1237 avctx->has_b_frames = !s->low_delay;
1238 if (s->unknown_flag) {
1239 #if CONFIG_ZLIB
1240 unsigned watermark_width = get_interleaved_ue_golomb(&bc);
1241 unsigned watermark_height = get_interleaved_ue_golomb(&bc);
1242 int u1 = get_interleaved_ue_golomb(&bc);
1243 int u2 = bitstream_read(&bc, 8);
1244 int u3 = bitstream_read(&bc, 2);
1245 int u4 = get_interleaved_ue_golomb(&bc);
1246 unsigned long buf_len = watermark_width *
1247 watermark_height * 4;
1248 int offset = bitstream_tell(&bc) + 7 >> 3;
1249 uint8_t *buf;
1251 if (watermark_height > 0 &&
1252 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1253 return -1;
1255 buf = av_malloc(buf_len);
1256 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1257 watermark_width, watermark_height);
1258 av_log(avctx, AV_LOG_DEBUG,
1259 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1260 u1, u2, u3, u4, offset);
1261 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1262 size - offset) != Z_OK) {
1263 av_log(avctx, AV_LOG_ERROR,
1264 "could not uncompress watermark logo\n");
1265 av_free(buf);
1266 return -1;
1268 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1269 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1270 av_log(avctx, AV_LOG_DEBUG,
1271 "watermark key %#"PRIx32"\n", s->watermark_key);
1272 av_free(buf);
1273 #else
1274 av_log(avctx, AV_LOG_ERROR,
1275 "this svq3 file contains watermark which need zlib support compiled in\n");
1276 return -1;
1277 #endif
1281 s->mb_width = (avctx->width + 15) / 16;
1282 s->mb_height = (avctx->height + 15) / 16;
1283 s->mb_stride = s->mb_width + 1;
1284 s->mb_num = s->mb_width * s->mb_height;
1285 s->b_stride = 4 * s->mb_width;
1286 s->h_edge_pos = s->mb_width * 16;
1287 s->v_edge_pos = s->mb_height * 16;
1289 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1290 if (!s->intra4x4_pred_mode)
1291 return AVERROR(ENOMEM);
1293 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1294 sizeof(*s->mb2br_xy));
1295 if (!s->mb2br_xy)
1296 return AVERROR(ENOMEM);
1298 for (y = 0; y < s->mb_height; y++)
1299 for (x = 0; x < s->mb_width; x++) {
1300 const int mb_xy = x + y * s->mb_stride;
1302 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1305 init_dequant4_coeff_table(s);
1307 return 0;
1310 static void free_picture(AVCodecContext *avctx, SVQ3Frame *pic)
1312 int i;
1313 for (i = 0; i < 2; i++) {
1314 av_buffer_unref(&pic->motion_val_buf[i]);
1315 av_buffer_unref(&pic->ref_index_buf[i]);
1317 av_buffer_unref(&pic->mb_type_buf);
1319 av_frame_unref(pic->f);
1322 static int get_buffer(AVCodecContext *avctx, SVQ3Frame *pic)
1324 SVQ3Context *s = avctx->priv_data;
1325 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1326 const int mb_array_size = s->mb_stride * s->mb_height;
1327 const int b4_stride = s->mb_width * 4 + 1;
1328 const int b4_array_size = b4_stride * s->mb_height * 4;
1329 int ret;
1331 if (!pic->motion_val_buf[0]) {
1332 int i;
1334 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1335 if (!pic->mb_type_buf)
1336 return AVERROR(ENOMEM);
1337 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1339 for (i = 0; i < 2; i++) {
1340 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1341 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1342 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1343 ret = AVERROR(ENOMEM);
1344 goto fail;
1347 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1348 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1352 ret = ff_get_buffer(avctx, pic->f,
1353 (s->pict_type != AV_PICTURE_TYPE_B) ?
1354 AV_GET_BUFFER_FLAG_REF : 0);
1355 if (ret < 0)
1356 goto fail;
1358 if (!s->edge_emu_buffer) {
1359 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1360 if (!s->edge_emu_buffer)
1361 return AVERROR(ENOMEM);
1364 return 0;
1365 fail:
1366 free_picture(avctx, pic);
1367 return ret;
1370 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1371 int *got_frame, AVPacket *avpkt)
1373 const uint8_t *buf = avpkt->data;
1374 SVQ3Context *s = avctx->priv_data;
1375 int buf_size = avpkt->size;
1376 int ret, m, i;
1378 /* special case for last picture */
1379 if (buf_size == 0) {
1380 if (s->next_pic->f->data[0] && !s->low_delay && !s->last_frame_output) {
1381 ret = av_frame_ref(data, s->next_pic->f);
1382 if (ret < 0)
1383 return ret;
1384 s->last_frame_output = 1;
1385 *got_frame = 1;
1387 return 0;
1390 ret = bitstream_init8(&s->bc, buf, buf_size);
1391 if (ret < 0)
1392 return ret;
1394 s->mb_x = s->mb_y = s->mb_xy = 0;
1396 if (svq3_decode_slice_header(avctx))
1397 return -1;
1399 if (s->pict_type != AV_PICTURE_TYPE_B)
1400 FFSWAP(SVQ3Frame*, s->next_pic, s->last_pic);
1402 av_frame_unref(s->cur_pic->f);
1404 /* for skipping the frame */
1405 s->cur_pic->f->pict_type = s->pict_type;
1406 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1408 ret = get_buffer(avctx, s->cur_pic);
1409 if (ret < 0)
1410 return ret;
1412 for (i = 0; i < 16; i++) {
1413 s->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1414 s->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1416 for (i = 0; i < 16; i++) {
1417 s->block_offset[16 + i] =
1418 s->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1419 s->block_offset[48 + 16 + i] =
1420 s->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1423 if (s->pict_type != AV_PICTURE_TYPE_I) {
1424 if (!s->last_pic->f->data[0]) {
1425 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1426 ret = get_buffer(avctx, s->last_pic);
1427 if (ret < 0)
1428 return ret;
1429 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1430 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1431 s->last_pic->f->linesize[1]);
1432 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1433 s->last_pic->f->linesize[2]);
1436 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1437 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1438 ret = get_buffer(avctx, s->next_pic);
1439 if (ret < 0)
1440 return ret;
1441 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1442 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1443 s->next_pic->f->linesize[1]);
1444 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1445 s->next_pic->f->linesize[2]);
1449 if (avctx->debug & FF_DEBUG_PICT_INFO)
1450 av_log(s->avctx, AV_LOG_DEBUG,
1451 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1452 av_get_picture_type_char(s->pict_type),
1453 s->halfpel_flag, s->thirdpel_flag,
1454 s->adaptive_quant, s->qscale, s->slice_num);
1456 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1457 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1458 avctx->skip_frame >= AVDISCARD_ALL)
1459 return 0;
1461 if (s->next_p_frame_damaged) {
1462 if (s->pict_type == AV_PICTURE_TYPE_B)
1463 return 0;
1464 else
1465 s->next_p_frame_damaged = 0;
1468 if (s->pict_type == AV_PICTURE_TYPE_B) {
1469 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1471 if (s->frame_num_offset < 0)
1472 s->frame_num_offset += 256;
1473 if (s->frame_num_offset == 0 ||
1474 s->frame_num_offset >= s->prev_frame_num_offset) {
1475 av_log(s->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1476 return -1;
1478 } else {
1479 s->prev_frame_num = s->frame_num;
1480 s->frame_num = s->slice_num;
1481 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1483 if (s->prev_frame_num_offset < 0)
1484 s->prev_frame_num_offset += 256;
1487 for (m = 0; m < 2; m++) {
1488 int i;
1489 for (i = 0; i < 4; i++) {
1490 int j;
1491 for (j = -1; j < 4; j++)
1492 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1493 if (i < 3)
1494 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1498 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1499 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1500 unsigned mb_type;
1501 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1503 if ((bitstream_bits_left(&s->bc_slice)) <= 7) {
1504 if (((bitstream_tell(&s->bc_slice) & 7) == 0 ||
1505 bitstream_peek(&s->bc_slice, bitstream_bits_left(&s->bc_slice) & 7) == 0)) {
1507 if (svq3_decode_slice_header(avctx))
1508 return -1;
1510 /* TODO: support s->mb_skip_run */
1513 mb_type = get_interleaved_ue_golomb(&s->bc_slice);
1515 if (s->pict_type == AV_PICTURE_TYPE_I)
1516 mb_type += 8;
1517 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1518 mb_type += 4;
1519 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1520 av_log(s->avctx, AV_LOG_ERROR,
1521 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1522 return -1;
1525 if (mb_type != 0)
1526 hl_decode_mb(s);
1528 if (s->pict_type != AV_PICTURE_TYPE_B && !s->low_delay)
1529 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1530 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1533 ff_draw_horiz_band(avctx, s->cur_pic->f,
1534 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1535 16 * s->mb_y, 16, PICT_FRAME, 0,
1536 s->low_delay);
1539 if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay)
1540 ret = av_frame_ref(data, s->cur_pic->f);
1541 else if (s->last_pic->f->data[0])
1542 ret = av_frame_ref(data, s->last_pic->f);
1543 if (ret < 0)
1544 return ret;
1546 /* Do not output the last pic after seeking. */
1547 if (s->last_pic->f->data[0] || s->low_delay)
1548 *got_frame = 1;
1550 if (s->pict_type != AV_PICTURE_TYPE_B) {
1551 FFSWAP(SVQ3Frame*, s->cur_pic, s->next_pic);
1552 } else {
1553 av_frame_unref(s->cur_pic->f);
1556 return buf_size;
1559 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1561 SVQ3Context *s = avctx->priv_data;
1563 free_picture(avctx, s->cur_pic);
1564 free_picture(avctx, s->next_pic);
1565 free_picture(avctx, s->last_pic);
1566 av_frame_free(&s->cur_pic->f);
1567 av_frame_free(&s->next_pic->f);
1568 av_frame_free(&s->last_pic->f);
1569 av_freep(&s->cur_pic);
1570 av_freep(&s->next_pic);
1571 av_freep(&s->last_pic);
1572 av_freep(&s->slice_buf);
1573 av_freep(&s->intra4x4_pred_mode);
1574 av_freep(&s->edge_emu_buffer);
1575 av_freep(&s->mb2br_xy);
1577 return 0;
1580 AVCodec ff_svq3_decoder = {
1581 .name = "svq3",
1582 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1583 .type = AVMEDIA_TYPE_VIDEO,
1584 .id = AV_CODEC_ID_SVQ3,
1585 .priv_data_size = sizeof(SVQ3Context),
1586 .init = svq3_decode_init,
1587 .close = svq3_decode_end,
1588 .decode = svq3_decode_frame,
1589 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1590 AV_CODEC_CAP_DR1 |
1591 AV_CODEC_CAP_DELAY,
1592 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1593 AV_PIX_FMT_NONE},