aarch64: Add assembly support for -fsanitize=hwaddress tagged globals.
[libav.git] / libavcodec / pixlet.c
blob9f7d082fbed17b97db16d6cea08c45a87c7877b4
1 /*
2 * Apple Pixlet decoder
3 * Copyright (c) 2016 Paul B Mahol
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <stdint.h>
24 #include "libavutil/imgutils.h"
25 #include "libavutil/intmath.h"
26 #include "libavutil/opt.h"
28 #include "avcodec.h"
29 #include "bitstream.h"
30 #include "bytestream.h"
31 #include "internal.h"
32 #include "thread.h"
33 #include "unary.h"
35 #define NB_LEVELS 4
37 #define PIXLET_MAGIC 0xDEADBEEF
39 #define H 0
40 #define V 1
42 typedef struct SubBand {
43 size_t width, height;
44 size_t size;
45 size_t x, y;
46 } SubBand;
48 typedef struct PixletContext {
49 AVClass *class;
51 GetByteContext gb;
52 BitstreamContext bc;
54 int levels;
55 int depth;
56 size_t w, h;
58 int16_t *filter[2];
59 int16_t *prediction;
60 int64_t scaling[4][2][NB_LEVELS];
61 SubBand band[4][NB_LEVELS * 3 + 1];
62 } PixletContext;
64 static av_cold int pixlet_init(AVCodecContext *avctx)
66 avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
67 avctx->color_range = AVCOL_RANGE_JPEG;
68 return 0;
71 static av_cold int pixlet_close(AVCodecContext *avctx)
73 PixletContext *ctx = avctx->priv_data;
75 av_freep(&ctx->filter[0]);
76 av_freep(&ctx->filter[1]);
77 av_freep(&ctx->prediction);
79 return 0;
82 static int init_decoder(AVCodecContext *avctx)
84 PixletContext *ctx = avctx->priv_data;
85 int i, plane;
87 ctx->filter[0] = av_malloc_array(ctx->h, sizeof(int16_t));
88 ctx->filter[1] = av_malloc_array(FFMAX(ctx->h, ctx->w) + 16, sizeof(int16_t));
89 ctx->prediction = av_malloc_array((ctx->w >> NB_LEVELS), sizeof(int16_t));
90 if (!ctx->filter[0] || !ctx->filter[1] || !ctx->prediction)
91 return AVERROR(ENOMEM);
93 for (plane = 0; plane < 3; plane++) {
94 unsigned shift = plane > 0;
95 size_t w = ctx->w >> shift;
96 size_t h = ctx->h >> shift;
98 ctx->band[plane][0].width = w >> NB_LEVELS;
99 ctx->band[plane][0].height = h >> NB_LEVELS;
100 ctx->band[plane][0].size = (w >> NB_LEVELS) * (h >> NB_LEVELS);
102 for (i = 0; i < NB_LEVELS * 3; i++) {
103 unsigned scale = ctx->levels - (i / 3);
105 ctx->band[plane][i + 1].width = w >> scale;
106 ctx->band[plane][i + 1].height = h >> scale;
107 ctx->band[plane][i + 1].size = (w >> scale) * (h >> scale);
109 ctx->band[plane][i + 1].x = (w >> scale) * (((i + 1) % 3) != 2);
110 ctx->band[plane][i + 1].y = (h >> scale) * (((i + 1) % 3) != 1);
114 return 0;
117 static int read_low_coeffs(AVCodecContext *avctx, int16_t *dst, size_t size,
118 size_t width, ptrdiff_t stride)
120 PixletContext *ctx = avctx->priv_data;
121 BitstreamContext *bc = &ctx->bc;
122 unsigned cnt1, nbits, k, j = 0, i = 0;
123 int64_t value, state = 3;
124 int rlen, escape, flag = 0;
126 while (i < size) {
127 nbits = FFMIN(ff_clz((state >> 8) + 3) ^ 0x1F, 14);
129 cnt1 = get_unary(bc, 0, 8);
130 if (cnt1 < 8) {
131 value = bitstream_read(bc, nbits);
132 if (value <= 1) {
133 bitstream_unget(bc, value & 1, 1);
134 value = 1;
136 escape = value + ((1 << nbits) - 1) * cnt1 - 1;
137 } else {
138 escape = bitstream_read(bc, 16);
141 value = -((escape + flag) & 1) | 1;
142 dst[j++] = value * ((escape + flag + 1) >> 1);
143 i++;
144 if (j == width) {
145 j = 0;
146 dst += stride;
148 state = 120 * (escape + flag) + state - (120 * state >> 8);
149 flag = 0;
151 if (state * 4 > 0xFF || i >= size)
152 continue;
154 nbits = ((state + 8) >> 5) + (state ? ff_clz(state) : 32) - 24;
155 escape = av_mod_uintp2(16383, nbits);
156 cnt1 = get_unary(bc, 0, 8);
157 if (cnt1 > 7) {
158 rlen = bitstream_read(bc, 16);
159 } else {
160 value = bitstream_read(bc, nbits);
161 if (value <= 1) {
162 bitstream_unget(bc, value & 1, 1);
163 value = 1;
165 rlen = value + escape * cnt1 - 1;
168 if (i + rlen > size)
169 return AVERROR_INVALIDDATA;
170 i += rlen;
172 for (k = 0; k < rlen; k++) {
173 dst[j++] = 0;
174 if (j == width) {
175 j = 0;
176 dst += stride;
180 state = 0;
181 flag = rlen < 0xFFFF ? 1 : 0;
184 bitstream_align(bc);
185 return bitstream_tell(bc) >> 3;
188 static int read_high_coeffs(AVCodecContext *avctx, uint8_t *src, int16_t *dst,
189 int size, int64_t c, int a, int64_t d,
190 int width, ptrdiff_t stride)
192 PixletContext *ctx = avctx->priv_data;
193 BitstreamContext *bc = &ctx->bc;
194 unsigned cnt1, shbits, rlen, nbits, length, i = 0, j = 0, k;
195 int ret, escape, pfx, cthulu, yflag, xflag, flag = 0;
196 int64_t state = 3, value, tmp;
198 ret = bitstream_init8(bc, src, bytestream2_get_bytes_left(&ctx->gb));
199 if (ret < 0)
200 return ret;
202 cthulu = (a >= 0) + (a ^ (a >> 31)) - (a >> 31);
203 if (cthulu != 1) {
204 nbits = 33 - ff_clz(cthulu - 1);
205 if (nbits > 16)
206 return AVERROR_INVALIDDATA;
207 } else {
208 nbits = 1;
211 length = 25 - nbits;
213 while (i < size) {
214 if (state >> 8 != -3)
215 value = ff_clz((state >> 8) + 3) ^ 0x1F;
216 else
217 value = -1;
219 cnt1 = get_unary(bc, 0, length);
220 if (cnt1 >= length) {
221 cnt1 = bitstream_read(bc, nbits);
222 } else {
223 pfx = 14 + (((value - 14) >> 32) & (value - 14));
224 cnt1 *= (1 << pfx) - 1;
226 shbits = bitstream_read(bc, pfx);
227 if (shbits <= 1) {
228 bitstream_unget(bc, shbits & 1, 1);
229 shbits = 1;
231 cnt1 += shbits - 1;
234 xflag = flag + cnt1;
235 yflag = xflag;
237 if (flag + cnt1 == 0) {
238 value = 0;
239 } else {
240 xflag &= 1u;
241 tmp = c * ((yflag + 1) >> 1) + (c >> 1);
242 value = xflag + (tmp ^ -xflag);
245 i++;
246 dst[j++] = value;
247 if (j == width) {
248 j = 0;
249 dst += stride;
251 state += d * yflag - (d * state >> 8);
253 flag = 0;
255 if (state * 4 > 0xFF || i >= size)
256 continue;
258 pfx = ((state + 8) >> 5) + (state ? ff_clz(state) : 32) - 24;
259 escape = av_mod_uintp2(16383, pfx);
260 cnt1 = get_unary(bc, 0, 8);
261 if (cnt1 < 8) {
262 if (pfx < 1 || pfx > 25)
263 return AVERROR_INVALIDDATA;
265 value = bitstream_read(bc, pfx);
266 if (value <= 1) {
267 bitstream_unget(bc, value & 1, 1);
268 value = 1;
270 rlen = value + escape * cnt1 - 1;
271 } else {
272 if (bitstream_read_bit(bc))
273 value = bitstream_read(bc, 16);
274 else
275 value = bitstream_read(bc, 8);
277 rlen = value + 8 * escape;
280 if (rlen > 0xFFFF || i + rlen > size)
281 return AVERROR_INVALIDDATA;
282 i += rlen;
284 for (k = 0; k < rlen; k++) {
285 dst[j++] = 0;
286 if (j == width) {
287 j = 0;
288 dst += stride;
292 state = 0;
293 flag = rlen < 0xFFFF ? 1 : 0;
296 bitstream_align(bc);
297 return bitstream_tell(bc) >> 3;
300 static int read_highpass(AVCodecContext *avctx, uint8_t *ptr,
301 int plane, AVFrame *frame)
303 PixletContext *ctx = avctx->priv_data;
304 ptrdiff_t stride = frame->linesize[plane] / 2;
305 int i, ret;
307 for (i = 0; i < ctx->levels * 3; i++) {
308 int32_t a = bytestream2_get_be32(&ctx->gb);
309 int32_t b = bytestream2_get_be32(&ctx->gb);
310 int32_t c = bytestream2_get_be32(&ctx->gb);
311 int32_t d = bytestream2_get_be32(&ctx->gb);
312 int16_t *dest = (int16_t *)frame->data[plane] +
313 ctx->band[plane][i + 1].x +
314 ctx->band[plane][i + 1].y * stride;
315 size_t size = ctx->band[plane][i + 1].size;
316 uint32_t magic = bytestream2_get_be32(&ctx->gb);
318 if (magic != PIXLET_MAGIC) {
319 av_log(avctx, AV_LOG_ERROR,
320 "wrong magic number: 0x%"PRIX32" for plane %d, band %d\n",
321 magic, plane, i);
322 return AVERROR_INVALIDDATA;
325 ret = read_high_coeffs(avctx, ptr + bytestream2_tell(&ctx->gb), dest,
326 size, c, (b >= FFABS(a)) ? b : a, d,
327 ctx->band[plane][i + 1].width, stride);
328 if (ret < 0) {
329 av_log(avctx, AV_LOG_ERROR,
330 "error in highpass coefficients for plane %d, band %d\n",
331 plane, i);
332 return ret;
334 bytestream2_skip(&ctx->gb, ret);
337 return 0;
340 static void line_add_sat_s16(int16_t *dst, const int16_t *src, size_t len)
342 int i;
343 for (i = 0; i < len; i++) {
344 int val = dst[i] + src[i];
345 dst[i] = av_clip_int16(val);
349 static void lowpass_prediction(int16_t *dst, int16_t *pred,
350 size_t width, size_t height, ptrdiff_t stride)
352 int i, j;
354 memset(pred, 0, width * sizeof(*pred));
356 for (i = 0; i < height; i++) {
357 line_add_sat_s16(pred, dst, width);
358 dst[0] = pred[0];
359 for (j = 1; j < width; j++)
360 dst[j] = pred[j] + dst[j - 1];
361 dst += stride;
365 static void filterfn(int16_t *dest, int16_t *tmp, size_t size, int64_t scale)
367 int16_t *low, *high, *ll, *lh, *hl, *hh;
368 int hsize, i, j;
369 int64_t value;
371 hsize = size >> 1;
372 low = tmp + 4;
373 high = &low[hsize + 8];
375 memcpy(low, dest, size);
376 memcpy(high, dest + hsize, size);
378 ll = &low[hsize];
379 lh = &low[hsize];
380 hl = &high[hsize];
381 hh = hl;
382 for (i = 4, j = 2; i; i--, j++, ll--, hh++, lh++, hl--) {
383 low[i - 5] = low[j - 1];
384 lh[0] = ll[-1];
385 high[i - 5] = high[j - 2];
386 hh[0] = hl[-2];
389 for (i = 0; i < hsize; i++) {
390 value = (int64_t) low [i + 1] * -INT64_C(325392907) +
391 (int64_t) low [i + 0] * INT64_C(3687786320) +
392 (int64_t) low [i - 1] * -INT64_C(325392907) +
393 (int64_t) high[i + 0] * INT64_C(1518500249) +
394 (int64_t) high[i - 1] * INT64_C(1518500249);
395 dest[i * 2] = av_clip_int16(((value >> 32) * scale) >> 32);
398 for (i = 0; i < hsize; i++) {
399 value = (int64_t) low [i + 2] * -INT64_C(65078576) +
400 (int64_t) low [i + 1] * INT64_C(1583578880) +
401 (int64_t) low [i + 0] * INT64_C(1583578880) +
402 (int64_t) low [i - 1] * -INT64_C(65078576) +
403 (int64_t) high[i + 1] * INT64_C(303700064) +
404 (int64_t) high[i + 0] * -INT64_C(3644400640) +
405 (int64_t) high[i - 1] * INT64_C(303700064);
406 dest[i * 2 + 1] = av_clip_int16(((value >> 32) * scale) >> 32);
410 static void reconstruction(AVCodecContext *avctx, int16_t *dest,
411 size_t width, size_t height, ptrdiff_t stride,
412 int64_t *scaling_h, int64_t *scaling_v)
414 PixletContext *ctx = avctx->priv_data;
415 unsigned scaled_width, scaled_height;
416 int16_t *ptr, *tmp;
417 int i, j, k;
419 scaled_width = width >> NB_LEVELS;
420 scaled_height = height >> NB_LEVELS;
421 tmp = ctx->filter[0];
423 for (i = 0; i < NB_LEVELS; i++) {
424 int64_t scale_v = scaling_v[i];
425 int64_t scale_h = scaling_h[i];
426 scaled_width <<= 1;
427 scaled_height <<= 1;
429 ptr = dest;
430 for (j = 0; j < scaled_height; j++) {
431 filterfn(ptr, ctx->filter[1], scaled_width, scale_v);
432 ptr += stride;
435 for (j = 0; j < scaled_width; j++) {
436 ptr = dest + j;
437 for (k = 0; k < scaled_height; k++) {
438 tmp[k] = *ptr;
439 ptr += stride;
442 filterfn(tmp, ctx->filter[1], scaled_height, scale_h);
444 ptr = dest + j;
445 for (k = 0; k < scaled_height; k++) {
446 *ptr = tmp[k];
447 ptr += stride;
453 static void postprocess_luma(AVFrame *frame, size_t w, size_t h, int depth)
455 uint16_t *dsty = (uint16_t *)frame->data[0];
456 int16_t *srcy = (int16_t *)frame->data[0];
457 ptrdiff_t stridey = frame->linesize[0] / 2;
458 int i, j;
460 for (j = 0; j < h; j++) {
461 for (i = 0; i < w; i++) {
462 if (srcy[i] <= 0)
463 dsty[i] = 0;
464 else if (srcy[i] > ((1 << depth) - 1))
465 dsty[i] = 65535;
466 else
467 dsty[i] = ((int64_t) srcy[i] * srcy[i] * 65535) /
468 ((1 << depth) - 1) / ((1 << depth) - 1);
470 dsty += stridey;
471 srcy += stridey;
475 static void postprocess_chroma(AVFrame *frame, int w, int h, int depth)
477 uint16_t *dstu = (uint16_t *)frame->data[1];
478 uint16_t *dstv = (uint16_t *)frame->data[2];
479 int16_t *srcu = (int16_t *)frame->data[1];
480 int16_t *srcv = (int16_t *)frame->data[2];
481 ptrdiff_t strideu = frame->linesize[1] / 2;
482 ptrdiff_t stridev = frame->linesize[2] / 2;
483 const unsigned add = 1 << (depth - 1);
484 const unsigned shift = 16 - depth;
485 int i, j;
487 for (j = 0; j < h; j++) {
488 for (i = 0; i < w; i++) {
489 dstu[i] = av_clip_uintp2_c(add + srcu[i], depth) << shift;
490 dstv[i] = av_clip_uintp2_c(add + srcv[i], depth) << shift;
492 dstu += strideu;
493 dstv += stridev;
494 srcu += strideu;
495 srcv += stridev;
499 static int decode_plane(AVCodecContext *avctx, int plane,
500 AVPacket *avpkt, AVFrame *frame)
502 PixletContext *ctx = avctx->priv_data;
503 ptrdiff_t stride = frame->linesize[plane] / 2;
504 unsigned shift = plane > 0;
505 int16_t *dst;
506 int i, ret;
508 for (i = ctx->levels - 1; i >= 0; i--) {
509 int32_t h = sign_extend(bytestream2_get_be32(&ctx->gb), 32);
510 int32_t v = sign_extend(bytestream2_get_be32(&ctx->gb), 32);
512 if (!h || !v)
513 return AVERROR_INVALIDDATA;
515 ctx->scaling[plane][H][i] = (1000000ULL << 32) / h;
516 ctx->scaling[plane][V][i] = (1000000ULL << 32) / v;
519 bytestream2_skip(&ctx->gb, 4);
521 dst = (int16_t *)frame->data[plane];
522 dst[0] = sign_extend(bytestream2_get_be16(&ctx->gb), 16);
524 ret = bitstream_init8(&ctx->bc, avpkt->data + bytestream2_tell(&ctx->gb),
525 bytestream2_get_bytes_left(&ctx->gb));
526 if (ret < 0)
527 return ret;
529 ret = read_low_coeffs(avctx, dst + 1, ctx->band[plane][0].width - 1,
530 ctx->band[plane][0].width - 1, 0);
531 if (ret < 0) {
532 av_log(avctx, AV_LOG_ERROR,
533 "error in lowpass coefficients for plane %d, top row\n", plane);
534 return ret;
537 ret = read_low_coeffs(avctx, dst + stride,
538 ctx->band[plane][0].height - 1, 1, stride);
539 if (ret < 0) {
540 av_log(avctx, AV_LOG_ERROR,
541 "error in lowpass coefficients for plane %d, left column\n",
542 plane);
543 return ret;
546 ret = read_low_coeffs(avctx, dst + stride + 1,
547 (ctx->band[plane][0].width - 1) * (ctx->band[plane][0].height - 1),
548 ctx->band[plane][0].width - 1, stride);
549 if (ret < 0) {
550 av_log(avctx, AV_LOG_ERROR,
551 "error in lowpass coefficients for plane %d, rest\n", plane);
552 return ret;
555 bytestream2_skip(&ctx->gb, ret);
556 if (bytestream2_get_bytes_left(&ctx->gb) <= 0) {
557 av_log(avctx, AV_LOG_ERROR, "no bytes left\n");
558 return AVERROR_INVALIDDATA;
561 ret = read_highpass(avctx, avpkt->data, plane, frame);
562 if (ret < 0)
563 return ret;
565 lowpass_prediction(dst, ctx->prediction, ctx->band[plane][0].width,
566 ctx->band[plane][0].height, stride);
568 reconstruction(avctx, (int16_t *)frame->data[plane], ctx->w >> shift,
569 ctx->h >> shift, stride, ctx->scaling[plane][H],
570 ctx->scaling[plane][V]);
572 return 0;
575 static int pixlet_decode_frame(AVCodecContext *avctx, void *data,
576 int *got_frame, AVPacket *avpkt)
578 PixletContext *ctx = avctx->priv_data;
579 int i, w, h, width, height, ret, version;
580 AVFrame *p = data;
581 ThreadFrame frame = { .f = data };
582 uint32_t pktsize;
584 bytestream2_init(&ctx->gb, avpkt->data, avpkt->size);
586 pktsize = bytestream2_get_be32(&ctx->gb);
587 if (pktsize <= 44 || pktsize - 4 > bytestream2_get_bytes_left(&ctx->gb)) {
588 av_log(avctx, AV_LOG_ERROR, "Invalid packet size %"PRIu32".\n", pktsize);
589 return AVERROR_INVALIDDATA;
592 version = bytestream2_get_le32(&ctx->gb);
593 if (version != 1)
594 avpriv_request_sample(avctx, "Version %d", version);
596 bytestream2_skip(&ctx->gb, 4);
597 if (bytestream2_get_be32(&ctx->gb) != 1)
598 return AVERROR_INVALIDDATA;
599 bytestream2_skip(&ctx->gb, 4);
601 width = bytestream2_get_be32(&ctx->gb);
602 height = bytestream2_get_be32(&ctx->gb);
604 w = FFALIGN(width, 1 << (NB_LEVELS + 1));
605 h = FFALIGN(height, 1 << (NB_LEVELS + 1));
607 ctx->levels = bytestream2_get_be32(&ctx->gb);
608 if (ctx->levels != NB_LEVELS)
609 return AVERROR_INVALIDDATA;
610 ctx->depth = bytestream2_get_be32(&ctx->gb);
611 if (ctx->depth < 8 || ctx->depth > 15) {
612 avpriv_request_sample(avctx, "Depth %d", ctx->depth);
613 return AVERROR_INVALIDDATA;
616 ret = ff_set_dimensions(avctx, w, h);
617 if (ret < 0)
618 return ret;
619 avctx->width = width;
620 avctx->height = height;
622 /* reinit should dimensions change */
623 if (ctx->w != w || ctx->h != h) {
624 pixlet_close(avctx);
625 ctx->w = w;
626 ctx->h = h;
628 ret = init_decoder(avctx);
629 if (ret < 0) {
630 pixlet_close(avctx);
631 ctx->w = 0;
632 ctx->h = 0;
633 return ret;
637 bytestream2_skip(&ctx->gb, 8);
639 ret = ff_thread_get_buffer(avctx, &frame, 0);
640 if (ret < 0)
641 return ret;
643 for (i = 0; i < 3; i++) {
644 ret = decode_plane(avctx, i, avpkt, frame.f);
645 if (ret < 0)
646 return ret;
647 if (avctx->flags & AV_CODEC_FLAG_GRAY)
648 break;
651 postprocess_luma(frame.f, ctx->w, ctx->h, ctx->depth);
652 postprocess_chroma(frame.f, ctx->w >> 1, ctx->h >> 1, ctx->depth);
654 p->pict_type = AV_PICTURE_TYPE_I;
655 p->color_range = AVCOL_RANGE_JPEG;
656 p->key_frame = 1;
658 *got_frame = 1;
660 return pktsize;
663 #if HAVE_THREADS
664 static int pixlet_init_thread_copy(AVCodecContext *avctx)
666 PixletContext *ctx = avctx->priv_data;
668 ctx->filter[0] = NULL;
669 ctx->filter[1] = NULL;
670 ctx->prediction = NULL;
671 ctx->w = 0;
672 ctx->h = 0;
674 return 0;
676 #endif /* HAVE_THREADS */
678 AVCodec ff_pixlet_decoder = {
679 .name = "pixlet",
680 .long_name = NULL_IF_CONFIG_SMALL("Apple Pixlet"),
681 .type = AVMEDIA_TYPE_VIDEO,
682 .id = AV_CODEC_ID_PIXLET,
683 .init = pixlet_init,
684 .init_thread_copy = ONLY_IF_THREADS_ENABLED(pixlet_init_thread_copy),
685 .close = pixlet_close,
686 .decode = pixlet_decode_frame,
687 .priv_data_size = sizeof(PixletContext),
688 .capabilities = AV_CODEC_CAP_DR1 |
689 AV_CODEC_CAP_FRAME_THREADS,
690 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
691 FF_CODEC_CAP_INIT_CLEANUP,