perf tools: Don't clone maps from parent when synthesizing forks
[linux/fpc-iii.git] / drivers / media / platform / vicodec / vicodec-codec.c
blob2d047646f6147e74f04f9cf6ebcd877ac40e554a
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright 2016 Tom aan de Wiel
4 * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
9 * R.D. Brown, 1977
12 #include <linux/string.h>
13 #include "vicodec-codec.h"
15 #define ALL_ZEROS 15
16 #define DEADZONE_WIDTH 20
18 static const uint8_t zigzag[64] = {
20 1, 8,
21 2, 9, 16,
22 3, 10, 17, 24,
23 4, 11, 18, 25, 32,
24 5, 12, 19, 26, 33, 40,
25 6, 13, 20, 27, 34, 41, 48,
26 7, 14, 21, 28, 35, 42, 49, 56,
27 15, 22, 29, 36, 43, 50, 57,
28 23, 30, 37, 44, 51, 58,
29 31, 38, 45, 52, 59,
30 39, 46, 53, 60,
31 47, 54, 61,
32 55, 62,
33 63,
37 static int rlc(const s16 *in, __be16 *output, int blocktype)
39 s16 block[8 * 8];
40 s16 *wp = block;
41 int i = 0;
42 int x, y;
43 int ret = 0;
45 /* read in block from framebuffer */
46 int lastzero_run = 0;
47 int to_encode;
49 for (y = 0; y < 8; y++) {
50 for (x = 0; x < 8; x++) {
51 *wp = in[x + y * 8];
52 wp++;
56 /* keep track of amount of trailing zeros */
57 for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
58 lastzero_run++;
60 *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
61 ret++;
63 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
65 i = 0;
66 while (i < to_encode) {
67 int cnt = 0;
68 int tmp;
70 /* count leading zeros */
71 while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
72 cnt++;
73 i++;
74 if (i == to_encode) {
75 cnt--;
76 break;
79 /* 4 bits for run, 12 for coefficient (quantization by 4) */
80 *output++ = htons((cnt | tmp << 4));
81 i++;
82 ret++;
84 if (lastzero_run > 14) {
85 *output = htons(ALL_ZEROS | 0);
86 ret++;
89 return ret;
93 * This function will worst-case increase rlc_in by 65*2 bytes:
94 * one s16 value for the header and 8 * 8 coefficients of type s16.
96 static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
98 /* header */
99 const __be16 *input = *rlc_in;
100 s16 ret = ntohs(*input++);
101 int dec_count = 0;
102 s16 block[8 * 8 + 16];
103 s16 *wp = block;
104 int i;
107 * Now de-compress, it expands one byte to up to 15 bytes
108 * (or fills the remainder of the 64 bytes with zeroes if it
109 * is the last byte to expand).
111 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
112 * allow for overflow if the incoming data was malformed.
114 while (dec_count < 8 * 8) {
115 s16 in = ntohs(*input++);
116 int length = in & 0xf;
117 int coeff = in >> 4;
119 /* fill remainder with zeros */
120 if (length == 15) {
121 for (i = 0; i < 64 - dec_count; i++)
122 *wp++ = 0;
123 break;
126 for (i = 0; i < length; i++)
127 *wp++ = 0;
128 *wp++ = coeff;
129 dec_count += length + 1;
132 wp = block;
134 for (i = 0; i < 64; i++) {
135 int pos = zigzag[i];
136 int y = pos / 8;
137 int x = pos % 8;
139 dwht_out[x + y * 8] = *wp++;
141 *rlc_in = input;
142 return ret;
145 static const int quant_table[] = {
146 2, 2, 2, 2, 2, 2, 2, 2,
147 2, 2, 2, 2, 2, 2, 2, 2,
148 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 3, 6,
150 2, 2, 2, 2, 2, 3, 6, 6,
151 2, 2, 2, 2, 3, 6, 6, 6,
152 2, 2, 2, 3, 6, 6, 6, 6,
153 2, 2, 3, 6, 6, 6, 6, 8,
156 static const int quant_table_p[] = {
157 3, 3, 3, 3, 3, 3, 3, 3,
158 3, 3, 3, 3, 3, 3, 3, 3,
159 3, 3, 3, 3, 3, 3, 3, 3,
160 3, 3, 3, 3, 3, 3, 3, 6,
161 3, 3, 3, 3, 3, 3, 6, 6,
162 3, 3, 3, 3, 3, 6, 6, 9,
163 3, 3, 3, 3, 6, 6, 9, 9,
164 3, 3, 3, 6, 6, 9, 9, 10,
167 static void quantize_intra(s16 *coeff, s16 *de_coeff)
169 const int *quant = quant_table;
170 int i, j;
172 for (j = 0; j < 8; j++) {
173 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
174 *coeff >>= *quant;
175 if (*coeff >= -DEADZONE_WIDTH &&
176 *coeff <= DEADZONE_WIDTH)
177 *coeff = *de_coeff = 0;
178 else
179 *de_coeff = *coeff << *quant;
184 static void dequantize_intra(s16 *coeff)
186 const int *quant = quant_table;
187 int i, j;
189 for (j = 0; j < 8; j++)
190 for (i = 0; i < 8; i++, quant++, coeff++)
191 *coeff <<= *quant;
194 static void quantize_inter(s16 *coeff, s16 *de_coeff)
196 const int *quant = quant_table_p;
197 int i, j;
199 for (j = 0; j < 8; j++) {
200 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
201 *coeff >>= *quant;
202 if (*coeff >= -DEADZONE_WIDTH &&
203 *coeff <= DEADZONE_WIDTH)
204 *coeff = *de_coeff = 0;
205 else
206 *de_coeff = *coeff << *quant;
211 static void dequantize_inter(s16 *coeff)
213 const int *quant = quant_table_p;
214 int i, j;
216 for (j = 0; j < 8; j++)
217 for (i = 0; i < 8; i++, quant++, coeff++)
218 *coeff <<= *quant;
221 static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
222 unsigned int input_step, bool intra)
224 /* we'll need more than 8 bits for the transformed coefficients */
225 s32 workspace1[8], workspace2[8];
226 const u8 *tmp = block;
227 s16 *out = output_block;
228 int add = intra ? 256 : 0;
229 unsigned int i;
231 /* stage 1 */
232 stride *= input_step;
234 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
235 if (input_step == 1) {
236 workspace1[0] = tmp[0] + tmp[1] - add;
237 workspace1[1] = tmp[0] - tmp[1];
239 workspace1[2] = tmp[2] + tmp[3] - add;
240 workspace1[3] = tmp[2] - tmp[3];
242 workspace1[4] = tmp[4] + tmp[5] - add;
243 workspace1[5] = tmp[4] - tmp[5];
245 workspace1[6] = tmp[6] + tmp[7] - add;
246 workspace1[7] = tmp[6] - tmp[7];
247 } else {
248 workspace1[0] = tmp[0] + tmp[2] - add;
249 workspace1[1] = tmp[0] - tmp[2];
251 workspace1[2] = tmp[4] + tmp[6] - add;
252 workspace1[3] = tmp[4] - tmp[6];
254 workspace1[4] = tmp[8] + tmp[10] - add;
255 workspace1[5] = tmp[8] - tmp[10];
257 workspace1[6] = tmp[12] + tmp[14] - add;
258 workspace1[7] = tmp[12] - tmp[14];
261 /* stage 2 */
262 workspace2[0] = workspace1[0] + workspace1[2];
263 workspace2[1] = workspace1[0] - workspace1[2];
264 workspace2[2] = workspace1[1] - workspace1[3];
265 workspace2[3] = workspace1[1] + workspace1[3];
267 workspace2[4] = workspace1[4] + workspace1[6];
268 workspace2[5] = workspace1[4] - workspace1[6];
269 workspace2[6] = workspace1[5] - workspace1[7];
270 workspace2[7] = workspace1[5] + workspace1[7];
272 /* stage 3 */
273 out[0] = workspace2[0] + workspace2[4];
274 out[1] = workspace2[0] - workspace2[4];
275 out[2] = workspace2[1] - workspace2[5];
276 out[3] = workspace2[1] + workspace2[5];
277 out[4] = workspace2[2] + workspace2[6];
278 out[5] = workspace2[2] - workspace2[6];
279 out[6] = workspace2[3] - workspace2[7];
280 out[7] = workspace2[3] + workspace2[7];
283 out = output_block;
285 for (i = 0; i < 8; i++, out++) {
286 /* stage 1 */
287 workspace1[0] = out[0] + out[1 * 8];
288 workspace1[1] = out[0] - out[1 * 8];
290 workspace1[2] = out[2 * 8] + out[3 * 8];
291 workspace1[3] = out[2 * 8] - out[3 * 8];
293 workspace1[4] = out[4 * 8] + out[5 * 8];
294 workspace1[5] = out[4 * 8] - out[5 * 8];
296 workspace1[6] = out[6 * 8] + out[7 * 8];
297 workspace1[7] = out[6 * 8] - out[7 * 8];
299 /* stage 2 */
300 workspace2[0] = workspace1[0] + workspace1[2];
301 workspace2[1] = workspace1[0] - workspace1[2];
302 workspace2[2] = workspace1[1] - workspace1[3];
303 workspace2[3] = workspace1[1] + workspace1[3];
305 workspace2[4] = workspace1[4] + workspace1[6];
306 workspace2[5] = workspace1[4] - workspace1[6];
307 workspace2[6] = workspace1[5] - workspace1[7];
308 workspace2[7] = workspace1[5] + workspace1[7];
309 /* stage 3 */
310 out[0 * 8] = workspace2[0] + workspace2[4];
311 out[1 * 8] = workspace2[0] - workspace2[4];
312 out[2 * 8] = workspace2[1] - workspace2[5];
313 out[3 * 8] = workspace2[1] + workspace2[5];
314 out[4 * 8] = workspace2[2] + workspace2[6];
315 out[5 * 8] = workspace2[2] - workspace2[6];
316 out[6 * 8] = workspace2[3] - workspace2[7];
317 out[7 * 8] = workspace2[3] + workspace2[7];
322 * Not the nicest way of doing it, but P-blocks get twice the range of
323 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
324 * Furthermore values can be negative... This is just a version that
325 * works with 16 signed data
327 static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
329 /* we'll need more than 8 bits for the transformed coefficients */
330 s32 workspace1[8], workspace2[8];
331 const s16 *tmp = block;
332 s16 *out = output_block;
333 int i;
335 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
336 /* stage 1 */
337 workspace1[0] = tmp[0] + tmp[1];
338 workspace1[1] = tmp[0] - tmp[1];
340 workspace1[2] = tmp[2] + tmp[3];
341 workspace1[3] = tmp[2] - tmp[3];
343 workspace1[4] = tmp[4] + tmp[5];
344 workspace1[5] = tmp[4] - tmp[5];
346 workspace1[6] = tmp[6] + tmp[7];
347 workspace1[7] = tmp[6] - tmp[7];
349 /* stage 2 */
350 workspace2[0] = workspace1[0] + workspace1[2];
351 workspace2[1] = workspace1[0] - workspace1[2];
352 workspace2[2] = workspace1[1] - workspace1[3];
353 workspace2[3] = workspace1[1] + workspace1[3];
355 workspace2[4] = workspace1[4] + workspace1[6];
356 workspace2[5] = workspace1[4] - workspace1[6];
357 workspace2[6] = workspace1[5] - workspace1[7];
358 workspace2[7] = workspace1[5] + workspace1[7];
360 /* stage 3 */
361 out[0] = workspace2[0] + workspace2[4];
362 out[1] = workspace2[0] - workspace2[4];
363 out[2] = workspace2[1] - workspace2[5];
364 out[3] = workspace2[1] + workspace2[5];
365 out[4] = workspace2[2] + workspace2[6];
366 out[5] = workspace2[2] - workspace2[6];
367 out[6] = workspace2[3] - workspace2[7];
368 out[7] = workspace2[3] + workspace2[7];
371 out = output_block;
373 for (i = 0; i < 8; i++, out++) {
374 /* stage 1 */
375 workspace1[0] = out[0] + out[1*8];
376 workspace1[1] = out[0] - out[1*8];
378 workspace1[2] = out[2*8] + out[3*8];
379 workspace1[3] = out[2*8] - out[3*8];
381 workspace1[4] = out[4*8] + out[5*8];
382 workspace1[5] = out[4*8] - out[5*8];
384 workspace1[6] = out[6*8] + out[7*8];
385 workspace1[7] = out[6*8] - out[7*8];
387 /* stage 2 */
388 workspace2[0] = workspace1[0] + workspace1[2];
389 workspace2[1] = workspace1[0] - workspace1[2];
390 workspace2[2] = workspace1[1] - workspace1[3];
391 workspace2[3] = workspace1[1] + workspace1[3];
393 workspace2[4] = workspace1[4] + workspace1[6];
394 workspace2[5] = workspace1[4] - workspace1[6];
395 workspace2[6] = workspace1[5] - workspace1[7];
396 workspace2[7] = workspace1[5] + workspace1[7];
398 /* stage 3 */
399 out[0*8] = workspace2[0] + workspace2[4];
400 out[1*8] = workspace2[0] - workspace2[4];
401 out[2*8] = workspace2[1] - workspace2[5];
402 out[3*8] = workspace2[1] + workspace2[5];
403 out[4*8] = workspace2[2] + workspace2[6];
404 out[5*8] = workspace2[2] - workspace2[6];
405 out[6*8] = workspace2[3] - workspace2[7];
406 out[7*8] = workspace2[3] + workspace2[7];
410 static void ifwht(const s16 *block, s16 *output_block, int intra)
413 * we'll need more than 8 bits for the transformed coefficients
414 * use native unit of cpu
416 int workspace1[8], workspace2[8];
417 int inter = intra ? 0 : 1;
418 const s16 *tmp = block;
419 s16 *out = output_block;
420 int i;
422 for (i = 0; i < 8; i++, tmp += 8, out += 8) {
423 /* stage 1 */
424 workspace1[0] = tmp[0] + tmp[1];
425 workspace1[1] = tmp[0] - tmp[1];
427 workspace1[2] = tmp[2] + tmp[3];
428 workspace1[3] = tmp[2] - tmp[3];
430 workspace1[4] = tmp[4] + tmp[5];
431 workspace1[5] = tmp[4] - tmp[5];
433 workspace1[6] = tmp[6] + tmp[7];
434 workspace1[7] = tmp[6] - tmp[7];
436 /* stage 2 */
437 workspace2[0] = workspace1[0] + workspace1[2];
438 workspace2[1] = workspace1[0] - workspace1[2];
439 workspace2[2] = workspace1[1] - workspace1[3];
440 workspace2[3] = workspace1[1] + workspace1[3];
442 workspace2[4] = workspace1[4] + workspace1[6];
443 workspace2[5] = workspace1[4] - workspace1[6];
444 workspace2[6] = workspace1[5] - workspace1[7];
445 workspace2[7] = workspace1[5] + workspace1[7];
447 /* stage 3 */
448 out[0] = workspace2[0] + workspace2[4];
449 out[1] = workspace2[0] - workspace2[4];
450 out[2] = workspace2[1] - workspace2[5];
451 out[3] = workspace2[1] + workspace2[5];
452 out[4] = workspace2[2] + workspace2[6];
453 out[5] = workspace2[2] - workspace2[6];
454 out[6] = workspace2[3] - workspace2[7];
455 out[7] = workspace2[3] + workspace2[7];
458 out = output_block;
460 for (i = 0; i < 8; i++, out++) {
461 /* stage 1 */
462 workspace1[0] = out[0] + out[1 * 8];
463 workspace1[1] = out[0] - out[1 * 8];
465 workspace1[2] = out[2 * 8] + out[3 * 8];
466 workspace1[3] = out[2 * 8] - out[3 * 8];
468 workspace1[4] = out[4 * 8] + out[5 * 8];
469 workspace1[5] = out[4 * 8] - out[5 * 8];
471 workspace1[6] = out[6 * 8] + out[7 * 8];
472 workspace1[7] = out[6 * 8] - out[7 * 8];
474 /* stage 2 */
475 workspace2[0] = workspace1[0] + workspace1[2];
476 workspace2[1] = workspace1[0] - workspace1[2];
477 workspace2[2] = workspace1[1] - workspace1[3];
478 workspace2[3] = workspace1[1] + workspace1[3];
480 workspace2[4] = workspace1[4] + workspace1[6];
481 workspace2[5] = workspace1[4] - workspace1[6];
482 workspace2[6] = workspace1[5] - workspace1[7];
483 workspace2[7] = workspace1[5] + workspace1[7];
485 /* stage 3 */
486 if (inter) {
487 int d;
489 out[0 * 8] = workspace2[0] + workspace2[4];
490 out[1 * 8] = workspace2[0] - workspace2[4];
491 out[2 * 8] = workspace2[1] - workspace2[5];
492 out[3 * 8] = workspace2[1] + workspace2[5];
493 out[4 * 8] = workspace2[2] + workspace2[6];
494 out[5 * 8] = workspace2[2] - workspace2[6];
495 out[6 * 8] = workspace2[3] - workspace2[7];
496 out[7 * 8] = workspace2[3] + workspace2[7];
498 for (d = 0; d < 8; d++)
499 out[8 * d] >>= 6;
500 } else {
501 int d;
503 out[0 * 8] = workspace2[0] + workspace2[4];
504 out[1 * 8] = workspace2[0] - workspace2[4];
505 out[2 * 8] = workspace2[1] - workspace2[5];
506 out[3 * 8] = workspace2[1] + workspace2[5];
507 out[4 * 8] = workspace2[2] + workspace2[6];
508 out[5 * 8] = workspace2[2] - workspace2[6];
509 out[6 * 8] = workspace2[3] - workspace2[7];
510 out[7 * 8] = workspace2[3] + workspace2[7];
512 for (d = 0; d < 8; d++) {
513 out[8 * d] >>= 6;
514 out[8 * d] += 128;
520 static void fill_encoder_block(const u8 *input, s16 *dst,
521 unsigned int stride, unsigned int input_step)
523 int i, j;
525 for (i = 0; i < 8; i++) {
526 for (j = 0; j < 8; j++, input += input_step)
527 *dst++ = *input;
528 input += (stride - 8) * input_step;
532 static int var_intra(const s16 *input)
534 int32_t mean = 0;
535 int32_t ret = 0;
536 const s16 *tmp = input;
537 int i;
539 for (i = 0; i < 8 * 8; i++, tmp++)
540 mean += *tmp;
541 mean /= 64;
542 tmp = input;
543 for (i = 0; i < 8 * 8; i++, tmp++)
544 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
545 return ret;
548 static int var_inter(const s16 *old, const s16 *new)
550 int32_t ret = 0;
551 int i;
553 for (i = 0; i < 8 * 8; i++, old++, new++)
554 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
555 return ret;
558 static int decide_blocktype(const u8 *cur, const u8 *reference,
559 s16 *deltablock, unsigned int stride,
560 unsigned int input_step)
562 s16 tmp[64];
563 s16 old[64];
564 s16 *work = tmp;
565 unsigned int k, l;
566 int vari;
567 int vard;
569 fill_encoder_block(cur, tmp, stride, input_step);
570 fill_encoder_block(reference, old, 8, 1);
571 vari = var_intra(tmp);
573 for (k = 0; k < 8; k++) {
574 for (l = 0; l < 8; l++) {
575 *deltablock = *work - *reference;
576 deltablock++;
577 work++;
578 reference++;
581 deltablock -= 64;
582 vard = var_inter(old, tmp);
583 return vari <= vard ? IBLOCK : PBLOCK;
586 static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
588 int i, j;
590 for (i = 0; i < 8; i++) {
591 for (j = 0; j < 8; j++)
592 *dst++ = *input++;
593 dst += stride - 8;
597 static void add_deltas(s16 *deltas, const u8 *ref, int stride)
599 int k, l;
601 for (k = 0; k < 8; k++) {
602 for (l = 0; l < 8; l++) {
603 *deltas += *ref++;
605 * Due to quantizing, it might possible that the
606 * decoded coefficients are slightly out of range
608 if (*deltas < 0)
609 *deltas = 0;
610 else if (*deltas > 255)
611 *deltas = 255;
612 deltas++;
614 ref += stride - 8;
618 static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
619 struct cframe *cf, u32 height, u32 width,
620 unsigned int input_step,
621 bool is_intra, bool next_is_intra)
623 u8 *input_start = input;
624 __be16 *rlco_start = *rlco;
625 s16 deltablock[64];
626 __be16 pframe_bit = htons(PFRAME_BIT);
627 u32 encoding = 0;
628 unsigned int last_size = 0;
629 unsigned int i, j;
631 for (j = 0; j < height / 8; j++) {
632 for (i = 0; i < width / 8; i++) {
633 /* intra code, first frame is always intra coded. */
634 int blocktype = IBLOCK;
635 unsigned int size;
637 if (!is_intra)
638 blocktype = decide_blocktype(input, refp,
639 deltablock, width, input_step);
640 if (is_intra || blocktype == IBLOCK) {
641 fwht(input, cf->coeffs, width, input_step, 1);
642 quantize_intra(cf->coeffs, cf->de_coeffs);
643 blocktype = IBLOCK;
644 } else {
645 /* inter code */
646 encoding |= FRAME_PCODED;
647 fwht16(deltablock, cf->coeffs, 8, 0);
648 quantize_inter(cf->coeffs, cf->de_coeffs);
650 if (!next_is_intra) {
651 ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
653 if (blocktype == PBLOCK)
654 add_deltas(cf->de_fwht, refp, 8);
655 fill_decoder_block(refp, cf->de_fwht, 8);
658 input += 8 * input_step;
659 refp += 8 * 8;
661 if (encoding & FRAME_UNENCODED)
662 continue;
664 size = rlc(cf->coeffs, *rlco, blocktype);
665 if (last_size == size &&
666 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
667 __be16 *last_rlco = *rlco - size;
668 s16 hdr = ntohs(*last_rlco);
670 if (!((*last_rlco ^ **rlco) & pframe_bit) &&
671 (hdr & DUPS_MASK) < DUPS_MASK)
672 *last_rlco = htons(hdr + 2);
673 else
674 *rlco += size;
675 } else {
676 *rlco += size;
678 if (*rlco >= rlco_max)
679 encoding |= FRAME_UNENCODED;
680 last_size = size;
682 input += width * 7 * input_step;
684 if (encoding & FRAME_UNENCODED) {
685 u8 *out = (u8 *)rlco_start;
687 input = input_start;
689 * The compressed stream should never contain the magic
690 * header, so when we copy the YUV data we replace 0xff
691 * by 0xfe. Since YUV is limited range such values
692 * shouldn't appear anyway.
694 for (i = 0; i < height * width; i++, input += input_step)
695 *out++ = (*input == 0xff) ? 0xfe : *input;
696 *rlco = (__be16 *)out;
698 return encoding;
701 u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
702 struct cframe *cf, bool is_intra, bool next_is_intra)
704 unsigned int size = frm->height * frm->width;
705 __be16 *rlco = cf->rlc_data;
706 __be16 *rlco_max;
707 u32 encoding;
709 rlco_max = rlco + size / 2 - 256;
710 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
711 frm->height, frm->width,
712 1, is_intra, next_is_intra);
713 if (encoding & FRAME_UNENCODED)
714 encoding |= LUMA_UNENCODED;
715 encoding &= ~FRAME_UNENCODED;
716 rlco_max = rlco + size / 8 - 256;
717 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
718 frm->height / 2, frm->width / 2,
719 frm->chroma_step, is_intra, next_is_intra);
720 if (encoding & FRAME_UNENCODED)
721 encoding |= CB_UNENCODED;
722 encoding &= ~FRAME_UNENCODED;
723 rlco_max = rlco + size / 8 - 256;
724 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
725 frm->height / 2, frm->width / 2,
726 frm->chroma_step, is_intra, next_is_intra);
727 if (encoding & FRAME_UNENCODED)
728 encoding |= CR_UNENCODED;
729 encoding &= ~FRAME_UNENCODED;
730 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
731 return encoding;
734 static void decode_plane(struct cframe *cf, const __be16 **rlco, u8 *ref,
735 u32 height, u32 width, bool uncompressed)
737 unsigned int copies = 0;
738 s16 copy[8 * 8];
739 s16 stat;
740 unsigned int i, j;
742 if (uncompressed) {
743 memcpy(ref, *rlco, width * height);
744 *rlco += width * height / 2;
745 return;
749 * When decoding each macroblock the rlco pointer will be increased
750 * by 65 * 2 bytes worst-case.
751 * To avoid overflow the buffer has to be 65/64th of the actual raw
752 * image size, just in case someone feeds it malicious data.
754 for (j = 0; j < height / 8; j++) {
755 for (i = 0; i < width / 8; i++) {
756 u8 *refp = ref + j * 8 * width + i * 8;
758 if (copies) {
759 memcpy(cf->de_fwht, copy, sizeof(copy));
760 if (stat & PFRAME_BIT)
761 add_deltas(cf->de_fwht, refp, width);
762 fill_decoder_block(refp, cf->de_fwht, width);
763 copies--;
764 continue;
767 stat = derlc(rlco, cf->coeffs);
769 if (stat & PFRAME_BIT)
770 dequantize_inter(cf->coeffs);
771 else
772 dequantize_intra(cf->coeffs);
774 ifwht(cf->coeffs, cf->de_fwht,
775 (stat & PFRAME_BIT) ? 0 : 1);
777 copies = (stat & DUPS_MASK) >> 1;
778 if (copies)
779 memcpy(copy, cf->de_fwht, sizeof(copy));
780 if (stat & PFRAME_BIT)
781 add_deltas(cf->de_fwht, refp, width);
782 fill_decoder_block(refp, cf->de_fwht, width);
787 void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags)
789 const __be16 *rlco = cf->rlc_data;
791 decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
792 hdr_flags & VICODEC_FL_LUMA_IS_UNCOMPRESSED);
793 decode_plane(cf, &rlco, ref->cb, cf->height / 2, cf->width / 2,
794 hdr_flags & VICODEC_FL_CB_IS_UNCOMPRESSED);
795 decode_plane(cf, &rlco, ref->cr, cf->height / 2, cf->width / 2,
796 hdr_flags & VICODEC_FL_CR_IS_UNCOMPRESSED);