Linux 4.19.133
[linux/fpc-iii.git] / drivers / media / platform / vicodec / vicodec-codec.c
blobd854b2344f12b1c68f299c25df095a6c95d067ac
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright 2016 Tom aan de Wiel
4 * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
9 * R.D. Brown, 1977
12 #include <linux/string.h>
13 #include "vicodec-codec.h"
15 #define ALL_ZEROS 15
16 #define DEADZONE_WIDTH 20
18 static const uint8_t zigzag[64] = {
20 1, 8,
21 2, 9, 16,
22 3, 10, 17, 24,
23 4, 11, 18, 25, 32,
24 5, 12, 19, 26, 33, 40,
25 6, 13, 20, 27, 34, 41, 48,
26 7, 14, 21, 28, 35, 42, 49, 56,
27 15, 22, 29, 36, 43, 50, 57,
28 23, 30, 37, 44, 51, 58,
29 31, 38, 45, 52, 59,
30 39, 46, 53, 60,
31 47, 54, 61,
32 55, 62,
33 63,
37 static int rlc(const s16 *in, __be16 *output, int blocktype)
39 s16 block[8 * 8];
40 s16 *wp = block;
41 int i = 0;
42 int x, y;
43 int ret = 0;
45 /* read in block from framebuffer */
46 int lastzero_run = 0;
47 int to_encode;
49 for (y = 0; y < 8; y++) {
50 for (x = 0; x < 8; x++) {
51 *wp = in[x + y * 8];
52 wp++;
56 /* keep track of amount of trailing zeros */
57 for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
58 lastzero_run++;
60 *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
61 ret++;
63 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
65 i = 0;
66 while (i < to_encode) {
67 int cnt = 0;
68 int tmp;
70 /* count leading zeros */
71 while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
72 cnt++;
73 i++;
74 if (i == to_encode) {
75 cnt--;
76 break;
79 /* 4 bits for run, 12 for coefficient (quantization by 4) */
80 *output++ = htons((cnt | tmp << 4));
81 i++;
82 ret++;
84 if (lastzero_run > 14) {
85 *output = htons(ALL_ZEROS | 0);
86 ret++;
89 return ret;
93 * This function will worst-case increase rlc_in by 65*2 bytes:
94 * one s16 value for the header and 8 * 8 coefficients of type s16.
96 static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
98 /* header */
99 const __be16 *input = *rlc_in;
100 s16 ret = ntohs(*input++);
101 int dec_count = 0;
102 s16 block[8 * 8 + 16];
103 s16 *wp = block;
104 int i;
107 * Now de-compress, it expands one byte to up to 15 bytes
108 * (or fills the remainder of the 64 bytes with zeroes if it
109 * is the last byte to expand).
111 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
112 * allow for overflow if the incoming data was malformed.
114 while (dec_count < 8 * 8) {
115 s16 in = ntohs(*input++);
116 int length = in & 0xf;
117 int coeff = in >> 4;
119 /* fill remainder with zeros */
120 if (length == 15) {
121 for (i = 0; i < 64 - dec_count; i++)
122 *wp++ = 0;
123 break;
126 for (i = 0; i < length; i++)
127 *wp++ = 0;
128 *wp++ = coeff;
129 dec_count += length + 1;
132 wp = block;
134 for (i = 0; i < 64; i++) {
135 int pos = zigzag[i];
136 int y = pos / 8;
137 int x = pos % 8;
139 dwht_out[x + y * 8] = *wp++;
141 *rlc_in = input;
142 return ret;
145 static const int quant_table[] = {
146 2, 2, 2, 2, 2, 2, 2, 2,
147 2, 2, 2, 2, 2, 2, 2, 2,
148 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 3, 6,
150 2, 2, 2, 2, 2, 3, 6, 6,
151 2, 2, 2, 2, 3, 6, 6, 6,
152 2, 2, 2, 3, 6, 6, 6, 6,
153 2, 2, 3, 6, 6, 6, 6, 8,
156 static const int quant_table_p[] = {
157 3, 3, 3, 3, 3, 3, 3, 3,
158 3, 3, 3, 3, 3, 3, 3, 3,
159 3, 3, 3, 3, 3, 3, 3, 3,
160 3, 3, 3, 3, 3, 3, 3, 6,
161 3, 3, 3, 3, 3, 3, 6, 6,
162 3, 3, 3, 3, 3, 6, 6, 9,
163 3, 3, 3, 3, 6, 6, 9, 9,
164 3, 3, 3, 6, 6, 9, 9, 10,
167 static void quantize_intra(s16 *coeff, s16 *de_coeff)
169 const int *quant = quant_table;
170 int i, j;
172 for (j = 0; j < 8; j++) {
173 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
174 *coeff >>= *quant;
175 if (*coeff >= -DEADZONE_WIDTH &&
176 *coeff <= DEADZONE_WIDTH)
177 *coeff = *de_coeff = 0;
178 else
179 *de_coeff = *coeff << *quant;
184 static void dequantize_intra(s16 *coeff)
186 const int *quant = quant_table;
187 int i, j;
189 for (j = 0; j < 8; j++)
190 for (i = 0; i < 8; i++, quant++, coeff++)
191 *coeff <<= *quant;
194 static void quantize_inter(s16 *coeff, s16 *de_coeff)
196 const int *quant = quant_table_p;
197 int i, j;
199 for (j = 0; j < 8; j++) {
200 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
201 *coeff >>= *quant;
202 if (*coeff >= -DEADZONE_WIDTH &&
203 *coeff <= DEADZONE_WIDTH)
204 *coeff = *de_coeff = 0;
205 else
206 *de_coeff = *coeff << *quant;
211 static void dequantize_inter(s16 *coeff)
213 const int *quant = quant_table_p;
214 int i, j;
216 for (j = 0; j < 8; j++)
217 for (i = 0; i < 8; i++, quant++, coeff++)
218 *coeff <<= *quant;
221 static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
222 unsigned int input_step, bool intra)
224 /* we'll need more than 8 bits for the transformed coefficients */
225 s32 workspace1[8], workspace2[8];
226 const u8 *tmp = block;
227 s16 *out = output_block;
228 int add = intra ? 256 : 0;
229 unsigned int i;
231 /* stage 1 */
232 stride *= input_step;
234 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
235 if (input_step == 1) {
236 workspace1[0] = tmp[0] + tmp[1] - add;
237 workspace1[1] = tmp[0] - tmp[1];
239 workspace1[2] = tmp[2] + tmp[3] - add;
240 workspace1[3] = tmp[2] - tmp[3];
242 workspace1[4] = tmp[4] + tmp[5] - add;
243 workspace1[5] = tmp[4] - tmp[5];
245 workspace1[6] = tmp[6] + tmp[7] - add;
246 workspace1[7] = tmp[6] - tmp[7];
247 } else {
248 workspace1[0] = tmp[0] + tmp[2] - add;
249 workspace1[1] = tmp[0] - tmp[2];
251 workspace1[2] = tmp[4] + tmp[6] - add;
252 workspace1[3] = tmp[4] - tmp[6];
254 workspace1[4] = tmp[8] + tmp[10] - add;
255 workspace1[5] = tmp[8] - tmp[10];
257 workspace1[6] = tmp[12] + tmp[14] - add;
258 workspace1[7] = tmp[12] - tmp[14];
261 /* stage 2 */
262 workspace2[0] = workspace1[0] + workspace1[2];
263 workspace2[1] = workspace1[0] - workspace1[2];
264 workspace2[2] = workspace1[1] - workspace1[3];
265 workspace2[3] = workspace1[1] + workspace1[3];
267 workspace2[4] = workspace1[4] + workspace1[6];
268 workspace2[5] = workspace1[4] - workspace1[6];
269 workspace2[6] = workspace1[5] - workspace1[7];
270 workspace2[7] = workspace1[5] + workspace1[7];
272 /* stage 3 */
273 out[0] = workspace2[0] + workspace2[4];
274 out[1] = workspace2[0] - workspace2[4];
275 out[2] = workspace2[1] - workspace2[5];
276 out[3] = workspace2[1] + workspace2[5];
277 out[4] = workspace2[2] + workspace2[6];
278 out[5] = workspace2[2] - workspace2[6];
279 out[6] = workspace2[3] - workspace2[7];
280 out[7] = workspace2[3] + workspace2[7];
283 out = output_block;
285 for (i = 0; i < 8; i++, out++) {
286 /* stage 1 */
287 workspace1[0] = out[0] + out[1 * 8];
288 workspace1[1] = out[0] - out[1 * 8];
290 workspace1[2] = out[2 * 8] + out[3 * 8];
291 workspace1[3] = out[2 * 8] - out[3 * 8];
293 workspace1[4] = out[4 * 8] + out[5 * 8];
294 workspace1[5] = out[4 * 8] - out[5 * 8];
296 workspace1[6] = out[6 * 8] + out[7 * 8];
297 workspace1[7] = out[6 * 8] - out[7 * 8];
299 /* stage 2 */
300 workspace2[0] = workspace1[0] + workspace1[2];
301 workspace2[1] = workspace1[0] - workspace1[2];
302 workspace2[2] = workspace1[1] - workspace1[3];
303 workspace2[3] = workspace1[1] + workspace1[3];
305 workspace2[4] = workspace1[4] + workspace1[6];
306 workspace2[5] = workspace1[4] - workspace1[6];
307 workspace2[6] = workspace1[5] - workspace1[7];
308 workspace2[7] = workspace1[5] + workspace1[7];
309 /* stage 3 */
310 out[0 * 8] = workspace2[0] + workspace2[4];
311 out[1 * 8] = workspace2[0] - workspace2[4];
312 out[2 * 8] = workspace2[1] - workspace2[5];
313 out[3 * 8] = workspace2[1] + workspace2[5];
314 out[4 * 8] = workspace2[2] + workspace2[6];
315 out[5 * 8] = workspace2[2] - workspace2[6];
316 out[6 * 8] = workspace2[3] - workspace2[7];
317 out[7 * 8] = workspace2[3] + workspace2[7];
322 * Not the nicest way of doing it, but P-blocks get twice the range of
323 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
324 * Furthermore values can be negative... This is just a version that
325 * works with 16 signed data
327 static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
329 /* we'll need more than 8 bits for the transformed coefficients */
330 s32 workspace1[8], workspace2[8];
331 const s16 *tmp = block;
332 s16 *out = output_block;
333 int i;
335 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
336 /* stage 1 */
337 workspace1[0] = tmp[0] + tmp[1];
338 workspace1[1] = tmp[0] - tmp[1];
340 workspace1[2] = tmp[2] + tmp[3];
341 workspace1[3] = tmp[2] - tmp[3];
343 workspace1[4] = tmp[4] + tmp[5];
344 workspace1[5] = tmp[4] - tmp[5];
346 workspace1[6] = tmp[6] + tmp[7];
347 workspace1[7] = tmp[6] - tmp[7];
349 /* stage 2 */
350 workspace2[0] = workspace1[0] + workspace1[2];
351 workspace2[1] = workspace1[0] - workspace1[2];
352 workspace2[2] = workspace1[1] - workspace1[3];
353 workspace2[3] = workspace1[1] + workspace1[3];
355 workspace2[4] = workspace1[4] + workspace1[6];
356 workspace2[5] = workspace1[4] - workspace1[6];
357 workspace2[6] = workspace1[5] - workspace1[7];
358 workspace2[7] = workspace1[5] + workspace1[7];
360 /* stage 3 */
361 out[0] = workspace2[0] + workspace2[4];
362 out[1] = workspace2[0] - workspace2[4];
363 out[2] = workspace2[1] - workspace2[5];
364 out[3] = workspace2[1] + workspace2[5];
365 out[4] = workspace2[2] + workspace2[6];
366 out[5] = workspace2[2] - workspace2[6];
367 out[6] = workspace2[3] - workspace2[7];
368 out[7] = workspace2[3] + workspace2[7];
371 out = output_block;
373 for (i = 0; i < 8; i++, out++) {
374 /* stage 1 */
375 workspace1[0] = out[0] + out[1*8];
376 workspace1[1] = out[0] - out[1*8];
378 workspace1[2] = out[2*8] + out[3*8];
379 workspace1[3] = out[2*8] - out[3*8];
381 workspace1[4] = out[4*8] + out[5*8];
382 workspace1[5] = out[4*8] - out[5*8];
384 workspace1[6] = out[6*8] + out[7*8];
385 workspace1[7] = out[6*8] - out[7*8];
387 /* stage 2 */
388 workspace2[0] = workspace1[0] + workspace1[2];
389 workspace2[1] = workspace1[0] - workspace1[2];
390 workspace2[2] = workspace1[1] - workspace1[3];
391 workspace2[3] = workspace1[1] + workspace1[3];
393 workspace2[4] = workspace1[4] + workspace1[6];
394 workspace2[5] = workspace1[4] - workspace1[6];
395 workspace2[6] = workspace1[5] - workspace1[7];
396 workspace2[7] = workspace1[5] + workspace1[7];
398 /* stage 3 */
399 out[0*8] = workspace2[0] + workspace2[4];
400 out[1*8] = workspace2[0] - workspace2[4];
401 out[2*8] = workspace2[1] - workspace2[5];
402 out[3*8] = workspace2[1] + workspace2[5];
403 out[4*8] = workspace2[2] + workspace2[6];
404 out[5*8] = workspace2[2] - workspace2[6];
405 out[6*8] = workspace2[3] - workspace2[7];
406 out[7*8] = workspace2[3] + workspace2[7];
410 static void ifwht(const s16 *block, s16 *output_block, int intra)
413 * we'll need more than 8 bits for the transformed coefficients
414 * use native unit of cpu
416 int workspace1[8], workspace2[8];
417 int inter = intra ? 0 : 1;
418 const s16 *tmp = block;
419 s16 *out = output_block;
420 int i;
422 for (i = 0; i < 8; i++, tmp += 8, out += 8) {
423 /* stage 1 */
424 workspace1[0] = tmp[0] + tmp[1];
425 workspace1[1] = tmp[0] - tmp[1];
427 workspace1[2] = tmp[2] + tmp[3];
428 workspace1[3] = tmp[2] - tmp[3];
430 workspace1[4] = tmp[4] + tmp[5];
431 workspace1[5] = tmp[4] - tmp[5];
433 workspace1[6] = tmp[6] + tmp[7];
434 workspace1[7] = tmp[6] - tmp[7];
436 /* stage 2 */
437 workspace2[0] = workspace1[0] + workspace1[2];
438 workspace2[1] = workspace1[0] - workspace1[2];
439 workspace2[2] = workspace1[1] - workspace1[3];
440 workspace2[3] = workspace1[1] + workspace1[3];
442 workspace2[4] = workspace1[4] + workspace1[6];
443 workspace2[5] = workspace1[4] - workspace1[6];
444 workspace2[6] = workspace1[5] - workspace1[7];
445 workspace2[7] = workspace1[5] + workspace1[7];
447 /* stage 3 */
448 out[0] = workspace2[0] + workspace2[4];
449 out[1] = workspace2[0] - workspace2[4];
450 out[2] = workspace2[1] - workspace2[5];
451 out[3] = workspace2[1] + workspace2[5];
452 out[4] = workspace2[2] + workspace2[6];
453 out[5] = workspace2[2] - workspace2[6];
454 out[6] = workspace2[3] - workspace2[7];
455 out[7] = workspace2[3] + workspace2[7];
458 out = output_block;
460 for (i = 0; i < 8; i++, out++) {
461 /* stage 1 */
462 workspace1[0] = out[0] + out[1 * 8];
463 workspace1[1] = out[0] - out[1 * 8];
465 workspace1[2] = out[2 * 8] + out[3 * 8];
466 workspace1[3] = out[2 * 8] - out[3 * 8];
468 workspace1[4] = out[4 * 8] + out[5 * 8];
469 workspace1[5] = out[4 * 8] - out[5 * 8];
471 workspace1[6] = out[6 * 8] + out[7 * 8];
472 workspace1[7] = out[6 * 8] - out[7 * 8];
474 /* stage 2 */
475 workspace2[0] = workspace1[0] + workspace1[2];
476 workspace2[1] = workspace1[0] - workspace1[2];
477 workspace2[2] = workspace1[1] - workspace1[3];
478 workspace2[3] = workspace1[1] + workspace1[3];
480 workspace2[4] = workspace1[4] + workspace1[6];
481 workspace2[5] = workspace1[4] - workspace1[6];
482 workspace2[6] = workspace1[5] - workspace1[7];
483 workspace2[7] = workspace1[5] + workspace1[7];
485 /* stage 3 */
486 if (inter) {
487 int d;
489 out[0 * 8] = workspace2[0] + workspace2[4];
490 out[1 * 8] = workspace2[0] - workspace2[4];
491 out[2 * 8] = workspace2[1] - workspace2[5];
492 out[3 * 8] = workspace2[1] + workspace2[5];
493 out[4 * 8] = workspace2[2] + workspace2[6];
494 out[5 * 8] = workspace2[2] - workspace2[6];
495 out[6 * 8] = workspace2[3] - workspace2[7];
496 out[7 * 8] = workspace2[3] + workspace2[7];
498 for (d = 0; d < 8; d++)
499 out[8 * d] >>= 6;
500 } else {
501 int d;
503 out[0 * 8] = workspace2[0] + workspace2[4];
504 out[1 * 8] = workspace2[0] - workspace2[4];
505 out[2 * 8] = workspace2[1] - workspace2[5];
506 out[3 * 8] = workspace2[1] + workspace2[5];
507 out[4 * 8] = workspace2[2] + workspace2[6];
508 out[5 * 8] = workspace2[2] - workspace2[6];
509 out[6 * 8] = workspace2[3] - workspace2[7];
510 out[7 * 8] = workspace2[3] + workspace2[7];
512 for (d = 0; d < 8; d++) {
513 out[8 * d] >>= 6;
514 out[8 * d] += 128;
520 static void fill_encoder_block(const u8 *input, s16 *dst,
521 unsigned int stride, unsigned int input_step)
523 int i, j;
525 for (i = 0; i < 8; i++) {
526 for (j = 0; j < 8; j++, input += input_step)
527 *dst++ = *input;
528 input += (stride - 8) * input_step;
532 static int var_intra(const s16 *input)
534 int32_t mean = 0;
535 int32_t ret = 0;
536 const s16 *tmp = input;
537 int i;
539 for (i = 0; i < 8 * 8; i++, tmp++)
540 mean += *tmp;
541 mean /= 64;
542 tmp = input;
543 for (i = 0; i < 8 * 8; i++, tmp++)
544 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
545 return ret;
548 static int var_inter(const s16 *old, const s16 *new)
550 int32_t ret = 0;
551 int i;
553 for (i = 0; i < 8 * 8; i++, old++, new++)
554 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
555 return ret;
558 static int decide_blocktype(const u8 *cur, const u8 *reference,
559 s16 *deltablock, unsigned int stride,
560 unsigned int input_step)
562 s16 tmp[64];
563 s16 old[64];
564 s16 *work = tmp;
565 unsigned int k, l;
566 int vari;
567 int vard;
569 fill_encoder_block(cur, tmp, stride, input_step);
570 fill_encoder_block(reference, old, 8, 1);
571 vari = var_intra(tmp);
573 for (k = 0; k < 8; k++) {
574 for (l = 0; l < 8; l++) {
575 *deltablock = *work - *reference;
576 deltablock++;
577 work++;
578 reference++;
581 deltablock -= 64;
582 vard = var_inter(old, tmp);
583 return vari <= vard ? IBLOCK : PBLOCK;
586 static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
588 int i, j;
590 for (i = 0; i < 8; i++) {
591 for (j = 0; j < 8; j++, input++, dst++) {
592 if (*input < 0)
593 *dst = 0;
594 else if (*input > 255)
595 *dst = 255;
596 else
597 *dst = *input;
599 dst += stride - 8;
603 static void add_deltas(s16 *deltas, const u8 *ref, int stride)
605 int k, l;
607 for (k = 0; k < 8; k++) {
608 for (l = 0; l < 8; l++) {
609 *deltas += *ref++;
611 * Due to quantizing, it might possible that the
612 * decoded coefficients are slightly out of range
614 if (*deltas < 0)
615 *deltas = 0;
616 else if (*deltas > 255)
617 *deltas = 255;
618 deltas++;
620 ref += stride - 8;
624 static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
625 struct cframe *cf, u32 height, u32 width,
626 unsigned int input_step,
627 bool is_intra, bool next_is_intra)
629 u8 *input_start = input;
630 __be16 *rlco_start = *rlco;
631 s16 deltablock[64];
632 __be16 pframe_bit = htons(PFRAME_BIT);
633 u32 encoding = 0;
634 unsigned int last_size = 0;
635 unsigned int i, j;
637 for (j = 0; j < height / 8; j++) {
638 for (i = 0; i < width / 8; i++) {
639 /* intra code, first frame is always intra coded. */
640 int blocktype = IBLOCK;
641 unsigned int size;
643 if (!is_intra)
644 blocktype = decide_blocktype(input, refp,
645 deltablock, width, input_step);
646 if (is_intra || blocktype == IBLOCK) {
647 fwht(input, cf->coeffs, width, input_step, 1);
648 quantize_intra(cf->coeffs, cf->de_coeffs);
649 blocktype = IBLOCK;
650 } else {
651 /* inter code */
652 encoding |= FRAME_PCODED;
653 fwht16(deltablock, cf->coeffs, 8, 0);
654 quantize_inter(cf->coeffs, cf->de_coeffs);
656 if (!next_is_intra) {
657 ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
659 if (blocktype == PBLOCK)
660 add_deltas(cf->de_fwht, refp, 8);
661 fill_decoder_block(refp, cf->de_fwht, 8);
664 input += 8 * input_step;
665 refp += 8 * 8;
667 if (encoding & FRAME_UNENCODED)
668 continue;
670 size = rlc(cf->coeffs, *rlco, blocktype);
671 if (last_size == size &&
672 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
673 __be16 *last_rlco = *rlco - size;
674 s16 hdr = ntohs(*last_rlco);
676 if (!((*last_rlco ^ **rlco) & pframe_bit) &&
677 (hdr & DUPS_MASK) < DUPS_MASK)
678 *last_rlco = htons(hdr + 2);
679 else
680 *rlco += size;
681 } else {
682 *rlco += size;
684 if (*rlco >= rlco_max)
685 encoding |= FRAME_UNENCODED;
686 last_size = size;
688 input += width * 7 * input_step;
690 if (encoding & FRAME_UNENCODED) {
691 u8 *out = (u8 *)rlco_start;
693 input = input_start;
695 * The compressed stream should never contain the magic
696 * header, so when we copy the YUV data we replace 0xff
697 * by 0xfe. Since YUV is limited range such values
698 * shouldn't appear anyway.
700 for (i = 0; i < height * width; i++, input += input_step)
701 *out++ = (*input == 0xff) ? 0xfe : *input;
702 *rlco = (__be16 *)out;
704 return encoding;
707 u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
708 struct cframe *cf, bool is_intra, bool next_is_intra)
710 unsigned int size = frm->height * frm->width;
711 __be16 *rlco = cf->rlc_data;
712 __be16 *rlco_max;
713 u32 encoding;
715 rlco_max = rlco + size / 2 - 256;
716 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
717 frm->height, frm->width,
718 1, is_intra, next_is_intra);
719 if (encoding & FRAME_UNENCODED)
720 encoding |= LUMA_UNENCODED;
721 encoding &= ~FRAME_UNENCODED;
722 rlco_max = rlco + size / 8 - 256;
723 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
724 frm->height / 2, frm->width / 2,
725 frm->chroma_step, is_intra, next_is_intra);
726 if (encoding & FRAME_UNENCODED)
727 encoding |= CB_UNENCODED;
728 encoding &= ~FRAME_UNENCODED;
729 rlco_max = rlco + size / 8 - 256;
730 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
731 frm->height / 2, frm->width / 2,
732 frm->chroma_step, is_intra, next_is_intra);
733 if (encoding & FRAME_UNENCODED)
734 encoding |= CR_UNENCODED;
735 encoding &= ~FRAME_UNENCODED;
736 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
737 return encoding;
740 static void decode_plane(struct cframe *cf, const __be16 **rlco, u8 *ref,
741 u32 height, u32 width, bool uncompressed)
743 unsigned int copies = 0;
744 s16 copy[8 * 8];
745 s16 stat;
746 unsigned int i, j;
748 if (uncompressed) {
749 memcpy(ref, *rlco, width * height);
750 *rlco += width * height / 2;
751 return;
755 * When decoding each macroblock the rlco pointer will be increased
756 * by 65 * 2 bytes worst-case.
757 * To avoid overflow the buffer has to be 65/64th of the actual raw
758 * image size, just in case someone feeds it malicious data.
760 for (j = 0; j < height / 8; j++) {
761 for (i = 0; i < width / 8; i++) {
762 u8 *refp = ref + j * 8 * width + i * 8;
764 if (copies) {
765 memcpy(cf->de_fwht, copy, sizeof(copy));
766 if (stat & PFRAME_BIT)
767 add_deltas(cf->de_fwht, refp, width);
768 fill_decoder_block(refp, cf->de_fwht, width);
769 copies--;
770 continue;
773 stat = derlc(rlco, cf->coeffs);
775 if (stat & PFRAME_BIT)
776 dequantize_inter(cf->coeffs);
777 else
778 dequantize_intra(cf->coeffs);
780 ifwht(cf->coeffs, cf->de_fwht,
781 (stat & PFRAME_BIT) ? 0 : 1);
783 copies = (stat & DUPS_MASK) >> 1;
784 if (copies)
785 memcpy(copy, cf->de_fwht, sizeof(copy));
786 if (stat & PFRAME_BIT)
787 add_deltas(cf->de_fwht, refp, width);
788 fill_decoder_block(refp, cf->de_fwht, width);
793 void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags)
795 const __be16 *rlco = cf->rlc_data;
797 decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
798 hdr_flags & VICODEC_FL_LUMA_IS_UNCOMPRESSED);
799 decode_plane(cf, &rlco, ref->cb, cf->height / 2, cf->width / 2,
800 hdr_flags & VICODEC_FL_CB_IS_UNCOMPRESSED);
801 decode_plane(cf, &rlco, ref->cr, cf->height / 2, cf->width / 2,
802 hdr_flags & VICODEC_FL_CR_IS_UNCOMPRESSED);