libavcodec/vp3dsp.c

   1 /*
   2  * Copyright (C) 2004 The FFmpeg project
   3  *
   4  * This file is part of Libav.
   5  *
   6  * Libav is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * Libav is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with Libav; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * Standard C DSP-oriented functions cribbed from the original VP3
  24  * source code.
  25  */
  26
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/intreadwrite.h"
  29 #include "libavutil/common.h"
  30 #include "libavutil/intreadwrite.h"
  31
  32 #include "avcodec.h"
  33 #include "rnd_avg.h"
  34 #include "vp3dsp.h"
  35
  36 #define IdctAdjustBeforeShift 8
  37 #define xC1S7 64277
  38 #define xC2S6 60547
  39 #define xC3S5 54491
  40 #define xC4S4 46341
  41 #define xC5S3 36410
  42 #define xC6S2 25080
  43 #define xC7S1 12785
  44
  45 #define M(a, b) (((a) * (b)) >> 16)
  46
  47 static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
  48                                   int16_t *input, int type)
  49 {
  50     int16_t *ip = input;
  51
  52     int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
  53     int Ed, Gd, Add, Bdd, Fd, Hd;
  54
  55     int i;
  56
  57     /* Inverse DCT on the rows now */
  58     for (i = 0; i < 8; i++) {
  59         /* Check for non-zero values */
  60         if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  61             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
  62             A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
  63             B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
  64             C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
  65             D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
  66
  67             Ad = M(xC4S4, (A - C));
  68             Bd = M(xC4S4, (B - D));
  69
  70             Cd = A + C;
  71             Dd = B + D;
  72
  73             E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
  74             F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
  75
  76             G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
  77             H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
  78
  79             Ed = E - G;
  80             Gd = E + G;
  81
  82             Add = F + Ad;
  83             Bdd = Bd - H;
  84
  85             Fd = F - Ad;
  86             Hd = Bd + H;
  87
  88             /*  Final sequence of operations over-write original inputs. */
  89             ip[0 * 8] = Gd + Cd;
  90             ip[7 * 8] = Gd - Cd;
  91
  92             ip[1 * 8] = Add + Hd;
  93             ip[2 * 8] = Add - Hd;
  94
  95             ip[3 * 8] = Ed + Dd;
  96             ip[4 * 8] = Ed - Dd;
  97
  98             ip[5 * 8] = Fd + Bdd;
  99             ip[6 * 8] = Fd - Bdd;
 100         }
 101
 102         ip += 1;            /* next row */
 103     }
 104
 105     ip = input;
 106
 107     for (i = 0; i < 8; i++) {
 108         /* Check for non-zero values (bitwise or faster than ||) */
 109         if (ip[1] | ip[2] | ip[3] |
 110             ip[4] | ip[5] | ip[6] | ip[7]) {
 111             A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
 112             B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
 113             C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
 114             D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
 115
 116             Ad = M(xC4S4, (A - C));
 117             Bd = M(xC4S4, (B - D));
 118
 119             Cd = A + C;
 120             Dd = B + D;
 121
 122             E = M(xC4S4, (ip[0] + ip[4])) + 8;
 123             F = M(xC4S4, (ip[0] - ip[4])) + 8;
 124
 125             if (type == 1) { // HACK
 126                 E += 16 * 128;
 127                 F += 16 * 128;
 128             }
 129
 130             G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
 131             H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
 132
 133             Ed = E - G;
 134             Gd = E + G;
 135
 136             Add = F + Ad;
 137             Bdd = Bd - H;
 138
 139             Fd = F - Ad;
 140             Hd = Bd + H;
 141
 142             /* Final sequence of operations over-write original inputs. */
 143             if (type == 1) {
 144                 dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
 145                 dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
 146
 147                 dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
 148                 dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
 149
 150                 dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
 151                 dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
 152
 153                 dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
 154                 dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
 155             } else {
 156                 dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
 157                 dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
 158
 159                 dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
 160                 dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
 161
 162                 dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
 163                 dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
 164
 165                 dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
 166                 dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
 167             }
 168         } else {
 169             if (type == 1) {
 170                 dst[0*stride] =
 171                 dst[1*stride] =
 172                 dst[2*stride] =
 173                 dst[3*stride] =
 174                 dst[4*stride] =
 175                 dst[5*stride] =
 176                 dst[6*stride] =
 177                 dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
 178             } else {
 179                 if (ip[0]) {
 180                     int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
 181                     dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
 182                     dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
 183                     dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
 184                     dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
 185                     dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
 186                     dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
 187                     dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
 188                     dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
 189                 }
 190             }
 191         }
 192
 193         ip += 8;            /* next column */
 194         dst++;
 195     }
 196 }
 197
 198 static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
 199                            int16_t *block /* align 16 */)
 200 {
 201     idct(dest, stride, block, 1);
 202     memset(block, 0, sizeof(*block) * 64);
 203 }
 204
 205 static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
 206                            int16_t *block /* align 16 */)
 207 {
 208     idct(dest, stride, block, 2);
 209     memset(block, 0, sizeof(*block) * 64);
 210 }
 211
 212 static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
 213                               int16_t *block /* align 16 */)
 214 {
 215     int i, dc = (block[0] + 15) >> 5;
 216
 217     for (i = 0; i < 8; i++) {
 218         dest[0] = av_clip_uint8(dest[0] + dc);
 219         dest[1] = av_clip_uint8(dest[1] + dc);
 220         dest[2] = av_clip_uint8(dest[2] + dc);
 221         dest[3] = av_clip_uint8(dest[3] + dc);
 222         dest[4] = av_clip_uint8(dest[4] + dc);
 223         dest[5] = av_clip_uint8(dest[5] + dc);
 224         dest[6] = av_clip_uint8(dest[6] + dc);
 225         dest[7] = av_clip_uint8(dest[7] + dc);
 226         dest   += stride;
 227     }
 228     block[0] = 0;
 229 }
 230
 231 static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
 232                                 int *bounding_values)
 233 {
 234     unsigned char *end;
 235     int filter_value;
 236     const ptrdiff_t nstride = -stride;
 237
 238     for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
 239         filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
 240                        (first_pixel[0] - first_pixel[nstride]) * 3;
 241         filter_value = bounding_values[(filter_value + 4) >> 3];
 242
 243         first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
 244         first_pixel[0]       = av_clip_uint8(first_pixel[0] - filter_value);
 245     }
 246 }
 247
 248 static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
 249                                 int *bounding_values)
 250 {
 251     unsigned char *end;
 252     int filter_value;
 253
 254     for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
 255         filter_value = (first_pixel[-2] - first_pixel[1]) +
 256                        (first_pixel[ 0] - first_pixel[-1]) * 3;
 257         filter_value = bounding_values[(filter_value + 4) >> 3];
 258
 259         first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
 260         first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
 261     }
 262 }
 263
 264 static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
 265                                  const uint8_t *src2, ptrdiff_t stride, int h)
 266 {
 267     int i;
 268
 269     for (i = 0; i < h; i++) {
 270         uint32_t a, b;
 271
 272         a = AV_RN32(&src1[i * stride]);
 273         b = AV_RN32(&src2[i * stride]);
 274         AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
 275         a = AV_RN32(&src1[i * stride + 4]);
 276         b = AV_RN32(&src2[i * stride + 4]);
 277         AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
 278     }
 279 }
 280
 281 av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
 282 {
 283     c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
 284
 285     c->idct_put      = vp3_idct_put_c;
 286     c->idct_add      = vp3_idct_add_c;
 287     c->idct_dc_add   = vp3_idct_dc_add_c;
 288     c->v_loop_filter = vp3_v_loop_filter_c;
 289     c->h_loop_filter = vp3_h_loop_filter_c;
 290
 291     if (ARCH_ARM)
 292         ff_vp3dsp_init_arm(c, flags);
 293     if (ARCH_PPC)
 294         ff_vp3dsp_init_ppc(c, flags);
 295     if (ARCH_X86)
 296         ff_vp3dsp_init_x86(c, flags);
 297 }