hvirtual/quicktime/libavcodec/imgconvert.c

   1 /*
   2  * Misc image convertion routines
   3  * Copyright (c) 2001, 2002 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19 #include "avcodec.h"
  20 #include "dsputil.h"
  21
  22 #ifdef USE_FASTMEMCPY
  23 #include "fastmemcpy.h"
  24 #endif
  25
  26 #ifdef HAVE_MMX
  27 #include "i386/mmx.h"
  28 #endif
  29 /* XXX: totally non optimized */
  30
  31 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  32                               UINT8 *src, int width, int height)
  33 {
  34     int x, y;
  35     UINT8 *p = src;
  36
  37     for(y=0;y<height;y+=2) {
  38         for(x=0;x<width;x+=2) {
  39             lum[0] = p[0];
  40             cb[0] = p[1];
  41             lum[1] = p[2];
  42             cr[0] = p[3];
  43             p += 4;
  44             lum += 2;
  45             cb++;
  46             cr++;
  47         }
  48         for(x=0;x<width;x+=2) {
  49             lum[0] = p[0];
  50             lum[1] = p[2];
  51             p += 4;
  52             lum += 2;
  53         }
  54     }
  55 }
  56
  57 #define SCALEBITS 8
  58 #define ONE_HALF  (1 << (SCALEBITS - 1))
  59 #define FIX(x)          ((int) ((x) * (1L<<SCALEBITS) + 0.5))
  60
  61 static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  62                               UINT8 *src, int width, int height)
  63 {
  64     int wrap, wrap3, x, y;
  65     int r, g, b, r1, g1, b1;
  66     UINT8 *p;
  67
  68     wrap = width;
  69     wrap3 = width * 3;
  70     p = src;
  71     for(y=0;y<height;y+=2) {
  72         for(x=0;x<width;x+=2) {
  73             r = p[0];
  74             g = p[1];
  75             b = p[2];
  76             r1 = r;
  77             g1 = g;
  78             b1 = b;
  79             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  80                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  81             r = p[3];
  82             g = p[4];
  83             b = p[5];
  84             r1 += r;
  85             g1 += g;
  86             b1 += b;
  87             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  88                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  89             p += wrap3;
  90             lum += wrap;
  91
  92             r = p[0];
  93             g = p[1];
  94             b = p[2];
  95             r1 += r;
  96             g1 += g;
  97             b1 += b;
  98             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  99                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 100             r = p[3];
 101             g = p[4];
 102             b = p[5];
 103             r1 += r;
 104             g1 += g;
 105             b1 += b;
 106             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 107                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 108
 109             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
 110                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 111             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
 112                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 113
 114             cb++;
 115             cr++;
 116             p += -wrap3 + 2 * 3;
 117             lum += -wrap + 2;
 118         }
 119         p += wrap3;
 120         lum += wrap;
 121     }
 122 }
 123
 124 static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
 125                               UINT8 *src, int width, int height)
 126 {
 127     int wrap, wrap4, x, y;
 128     int r, g, b, r1, g1, b1;
 129     UINT8 *p;
 130
 131     wrap = width;
 132     wrap4 = width * 4;
 133     p = src;
 134     for(y=0;y<height;y+=2) {
 135         for(x=0;x<width;x+=2) {
 136             r = p[0];
 137             g = p[1];
 138             b = p[2];
 139             r1 = r;
 140             g1 = g;
 141             b1 = b;
 142             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 143                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 144             r = p[4];
 145             g = p[5];
 146             b = p[6];
 147             r1 += r;
 148             g1 += g;
 149             b1 += b;
 150             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 151                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 152             p += wrap4;
 153             lum += wrap;
 154
 155             r = p[0];
 156             g = p[1];
 157             b = p[2];
 158             r1 += r;
 159             g1 += g;
 160             b1 += b;
 161             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 162                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 163             r = p[4];
 164             g = p[5];
 165             b = p[6];
 166             r1 += r;
 167             g1 += g;
 168             b1 += b;
 169             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 170                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 171
 172             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
 173                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 174             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
 175                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 176
 177             cb++;
 178             cr++;
 179             p += -wrap4 + 2 * 4;
 180             lum += -wrap + 2;
 181         }
 182         p += wrap4;
 183         lum += wrap;
 184     }
 185 }
 186
 187 #define rgb565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0020,63,0x0001,31)
 188 #define rgb555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0400,31, 0x0020,31,0x0001,31)
 189 #define rgb5551_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0040,31,0x0002,31)
 190 #define bgr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,63,0x0800,31)
 191 #define bgr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,31,0x0400,31)
 192 #define gbr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0800,31,0x0040,63)
 193 #define gbr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0400,31,0x0020,31)
 194
 195 static void rgbmisc_to_yuv420p
 196   (UINT8 *lum, UINT8 *cb, UINT8 *cr,
 197    UINT8 *src, int width, int height,
 198
 199    UINT16 R_LOWMASK, UINT16 R_MAX,
 200    UINT16 G_LOWMASK, UINT16 G_MAX,
 201    UINT16 B_LOWMASK, UINT16 B_MAX
 202   )
 203 {
 204     int wrap, wrap2, x, y;
 205     int r, g, b, r1, g1, b1;
 206     UINT8 *p;
 207     UINT16 pixel;
 208
 209     wrap = width;
 210     wrap2 = width * 2;
 211     p = src;
 212     for(y=0;y<height;y+=2) {
 213         for(x=0;x<width;x+=2) {
 214             pixel = p[0] | (p[1]<<8);
 215             r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
 216             g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
 217             b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
 218             r1 = r;
 219             g1 = g;
 220             b1 = b;
 221             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 222                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 223
 224             pixel = p[2] | (p[3]<<8);
 225             r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
 226             g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
 227             b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
 228             r1 += r;
 229             g1 += g;
 230             b1 += b;
 231             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 232                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 233             p += wrap2;
 234             lum += wrap;
 235
 236             pixel = p[0] | (p[1]<<8);
 237             r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
 238             g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
 239             b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
 240             r1 += r;
 241             g1 += g;
 242             b1 += b;
 243             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 244                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 245             pixel = p[2] | (p[3]<<8);
 246             r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
 247             g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
 248             b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
 249             r1 += r;
 250             g1 += g;
 251             b1 += b;
 252             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 253                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 254
 255             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
 256                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 257             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
 258                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 259
 260             cb++;
 261             cr++;
 262             p += -wrap2 + 2 * 2;
 263             lum += -wrap + 2;
 264         }
 265         p += wrap2;
 266         lum += wrap;
 267     }
 268 }
 269
 270
 271 static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
 272                               UINT8 *src, int width, int height)
 273 {
 274     int wrap, wrap3, x, y;
 275     int r, g, b, r1, g1, b1;
 276     UINT8 *p;
 277
 278     wrap = width;
 279     wrap3 = width * 3;
 280     p = src;
 281     for(y=0;y<height;y+=2) {
 282         for(x=0;x<width;x+=2) {
 283             b = p[0];
 284             g = p[1];
 285             r = p[2];
 286             r1 = r;
 287             g1 = g;
 288             b1 = b;
 289             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 290                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 291             b = p[3];
 292             g = p[4];
 293             r = p[5];
 294             r1 += r;
 295             g1 += g;
 296             b1 += b;
 297             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 298                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 299             p += wrap3;
 300             lum += wrap;
 301
 302             b = p[0];
 303             g = p[1];
 304             r = p[2];
 305             r1 += r;
 306             g1 += g;
 307             b1 += b;
 308             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 309                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 310             b = p[3];
 311             g = p[4];
 312             r = p[5];
 313             r1 += r;
 314             g1 += g;
 315             b1 += b;
 316             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 317                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 318
 319             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
 320                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 321             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
 322                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 323
 324             cb++;
 325             cr++;
 326             p += -wrap3 + 2 * 3;
 327             lum += -wrap + 2;
 328         }
 329         p += wrap3;
 330         lum += wrap;
 331     }
 332 }
 333
 334 static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
 335                               UINT8 *src, int width, int height)
 336 {
 337     int wrap, wrap4, x, y;
 338     int r, g, b, r1, g1, b1;
 339     UINT8 *p;
 340
 341     wrap = width;
 342     wrap4 = width * 4;
 343     p = src;
 344     for(y=0;y<height;y+=2) {
 345         for(x=0;x<width;x+=2) {
 346             b = p[0];
 347             g = p[1];
 348             r = p[2];
 349             r1 = r;
 350             g1 = g;
 351             b1 = b;
 352             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 353                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 354             b = p[4];
 355             g = p[5];
 356             r = p[6];
 357             r1 += r;
 358             g1 += g;
 359             b1 += b;
 360             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 361                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 362             p += wrap4;
 363             lum += wrap;
 364
 365             b = p[0];
 366             g = p[1];
 367             r = p[2];
 368             r1 += r;
 369             g1 += g;
 370             b1 += b;
 371             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
 372                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 373             b = p[4];
 374             g = p[5];
 375             r = p[6];
 376             r1 += r;
 377             g1 += g;
 378             b1 += b;
 379             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
 380                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
 381
 382             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
 383                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 384             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
 385                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
 386
 387             cb++;
 388             cr++;
 389             p += -wrap4 + 2 * 4;
 390             lum += -wrap + 2;
 391         }
 392         p += wrap4;
 393         lum += wrap;
 394     }
 395 }
 396
 397 /* XXX: use generic filter ? */
 398 /* 1x2 -> 1x1 */
 399 static void shrink2(UINT8 *dst, int dst_wrap,
 400                     UINT8 *src, int src_wrap,
 401                     int width, int height)
 402 {
 403     int w;
 404     UINT8 *s1, *s2, *d;
 405
 406     for(;height > 0; height--) {
 407         s1 = src;
 408         s2 = s1 + src_wrap;
 409         d = dst;
 410         for(w = width;w >= 4; w-=4) {
 411             d[0] = (s1[0] + s2[0]) >> 1;
 412             d[1] = (s1[1] + s2[1]) >> 1;
 413             d[2] = (s1[2] + s2[2]) >> 1;
 414             d[3] = (s1[3] + s2[3]) >> 1;
 415             s1 += 4;
 416             s2 += 4;
 417             d += 4;
 418         }
 419         for(;w > 0; w--) {
 420             d[0] = (s1[0] + s2[0]) >> 1;
 421             s1++;
 422             s2++;
 423             d++;
 424         }
 425         src += 2 * src_wrap;
 426         dst += dst_wrap;
 427     }
 428 }
 429
 430 /* 2x2 -> 1x1 */
 431 static void shrink22(UINT8 *dst, int dst_wrap,
 432                      UINT8 *src, int src_wrap,
 433                      int width, int height)
 434 {
 435     int w;
 436     UINT8 *s1, *s2, *d;
 437
 438     for(;height > 0; height--) {
 439         s1 = src;
 440         s2 = s1 + src_wrap;
 441         d = dst;
 442         for(w = width;w >= 4; w-=4) {
 443             d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
 444             d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
 445             d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
 446             d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
 447             s1 += 8;
 448             s2 += 8;
 449             d += 4;
 450         }
 451         for(;w > 0; w--) {
 452             d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
 453             s1 += 2;
 454             s2 += 2;
 455             d++;
 456         }
 457         src += 2 * src_wrap;
 458         dst += dst_wrap;
 459     }
 460 }
 461
 462 /* 1x1 -> 2x2 */
 463 static void grow22(UINT8 *dst, int dst_wrap,
 464                      UINT8 *src, int src_wrap,
 465                      int width, int height)
 466 {
 467     int w;
 468     UINT8 *s1, *d;
 469
 470     for(;height > 0; height--) {
 471         s1 = src;
 472         d = dst;
 473         for(w = width;w >= 4; w-=4) {
 474             d[1] = d[0] = s1[0];
 475             d[3] = d[2] = s1[1];
 476             s1 += 2;
 477             d += 4;
 478         }
 479         for(;w > 0; w--) {
 480             d[0] = s1[0];
 481             s1 ++;
 482             d++;
 483         }
 484         if (height%2)
 485             src += src_wrap;
 486         dst += dst_wrap;
 487     }
 488 }
 489
 490 /* 1x2 -> 2x1. width and height are given for the source picture */
 491 static void conv411(UINT8 *dst, int dst_wrap,
 492                     UINT8 *src, int src_wrap,
 493                     int width, int height)
 494 {
 495     int w, c;
 496     UINT8 *s1, *s2, *d;
 497
 498     for(;height > 0; height -= 2) {
 499         s1 = src;
 500         s2 = src + src_wrap;
 501         d = dst;
 502         for(w = width;w > 0; w--) {
 503             c = (s1[0] + s2[0]) >> 1;
 504             d[0] = c;
 505             d[1] = c;
 506             s1++;
 507             s2++;
 508             d += 2;
 509         }
 510         src += src_wrap * 2;
 511         dst += dst_wrap;
 512     }
 513 }
 514
 515 static void img_copy(UINT8 *dst, int dst_wrap,
 516                      UINT8 *src, int src_wrap,
 517                      int width, int height)
 518 {
 519     for(;height > 0; height--) {
 520         memcpy(dst, src, width);
 521         dst += dst_wrap;
 522         src += src_wrap;
 523     }
 524 }
 525
 526 #define SCALE_BITS 10
 527
 528 #define C_Y  (76309 >> (16 - SCALE_BITS))
 529 #define C_RV (117504 >> (16 - SCALE_BITS))
 530 #define C_BU (138453 >> (16 - SCALE_BITS))
 531 #define C_GU (13954 >> (16 - SCALE_BITS))
 532 #define C_GV (34903 >> (16 - SCALE_BITS))
 533
 534 #define RGBOUT(r, g, b, y1)\
 535 {\
 536     y = (y1 - 16) * C_Y;\
 537     r = cm[(y + r_add) >> SCALE_BITS];\
 538     g = cm[(y + g_add) >> SCALE_BITS];\
 539     b = cm[(y + b_add) >> SCALE_BITS];\
 540 }
 541
 542 /* XXX: no chroma interpolating is done */
 543 static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src,
 544                              int width, int height)
 545 {
 546     UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
 547     int w, y, cb, cr, r_add, g_add, b_add, width2;
 548     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 549
 550     d = dst->data[0];
 551     y1_ptr = src->data[0];
 552     cb_ptr = src->data[1];
 553     cr_ptr = src->data[2];
 554     width2 = width >> 1;
 555     for(;height > 0; height -= 2) {
 556         d1 = d;
 557         d2 = d + dst->linesize[0];
 558         y2_ptr = y1_ptr + src->linesize[0];
 559         for(w = width2; w > 0; w --) {
 560             cb = cb_ptr[0] - 128;
 561             cr = cr_ptr[0] - 128;
 562             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
 563             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
 564             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
 565
 566             /* output 4 pixels */
 567             RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]);
 568             RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]);
 569             RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]);
 570             RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]);
 571
 572             d1[3] = d1[7] = d2[3] = d2[7] = 255;
 573
 574             d1 += 8;
 575             d2 += 8;
 576             y1_ptr += 2;
 577             y2_ptr += 2;
 578             cb_ptr++;
 579             cr_ptr++;
 580         }
 581         d += 2 * dst->linesize[0];
 582         y1_ptr += 2 * src->linesize[0] - width;
 583         cb_ptr += src->linesize[1] - width2;
 584         cr_ptr += src->linesize[2] - width2;
 585     }
 586 }
 587
 588 /* XXX: no chroma interpolating is done */
 589 static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src,
 590                              int width, int height)
 591 {
 592     UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
 593     int w, y, cb, cr, r_add, g_add, b_add, width2;
 594     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 595
 596     d = dst->data[0];
 597     y1_ptr = src->data[0];
 598     cb_ptr = src->data[1];
 599     cr_ptr = src->data[2];
 600     width2 = width >> 1;
 601     for(;height > 0; height -= 2) {
 602         d1 = d;
 603         d2 = d + dst->linesize[0];
 604         y2_ptr = y1_ptr + src->linesize[0];
 605         for(w = width2; w > 0; w --) {
 606             cb = cb_ptr[0] - 128;
 607             cr = cr_ptr[0] - 128;
 608             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
 609             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
 610             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
 611
 612             /* output 4 pixels */
 613             RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
 614             RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]);
 615             RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
 616             RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]);
 617
 618             d1[3] = d1[7] = d2[3] = d2[7] = 255;
 619
 620             d1 += 8;
 621             d2 += 8;
 622             y1_ptr += 2;
 623             y2_ptr += 2;
 624             cb_ptr++;
 625             cr_ptr++;
 626         }
 627         d += 2 * dst->linesize[0];
 628         y1_ptr += 2 * src->linesize[0] - width;
 629         cb_ptr += src->linesize[1] - width2;
 630         cr_ptr += src->linesize[2] - width2;
 631     }
 632 }
 633
 634 /* XXX: no chroma interpolating is done */
 635 static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
 636                              int width, int height)
 637 {
 638     UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
 639     int w, y, cb, cr, r_add, g_add, b_add, width2;
 640     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 641
 642     d = dst->data[0];
 643     y1_ptr = src->data[0];
 644     cb_ptr = src->data[1];
 645     cr_ptr = src->data[2];
 646     width2 = width >> 1;
 647     for(;height > 0; height -= 2) {
 648         d1 = d;
 649         d2 = d + dst->linesize[0];
 650         y2_ptr = y1_ptr + src->linesize[0];
 651         for(w = width2; w > 0; w --) {
 652             cb = cb_ptr[0] - 128;
 653             cr = cr_ptr[0] - 128;
 654             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
 655             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
 656             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
 657
 658             /* output 4 pixels */
 659             RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
 660             RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
 661             RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
 662             RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
 663
 664             d1 += 6;
 665             d2 += 6;
 666             y1_ptr += 2;
 667             y2_ptr += 2;
 668             cb_ptr++;
 669             cr_ptr++;
 670         }
 671         d += 2 * dst->linesize[0];
 672         y1_ptr += 2 * src->linesize[0] - width;
 673         cb_ptr += src->linesize[1] - width2;
 674         cr_ptr += src->linesize[2] - width2;
 675     }
 676 }
 677
 678 /* XXX: no chroma interpolating is done */
 679 static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
 680                              int width, int height)
 681 {
 682     UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
 683     int w, y, cb, cr, r_add, g_add, b_add, width2;
 684     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 685
 686     d = dst->data[0];
 687     y1_ptr = src->data[0];
 688     cb_ptr = src->data[1];
 689     cr_ptr = src->data[2];
 690     width2 = width >> 1;
 691     for(;height > 0; height --) {
 692         d1 = d;
 693         for(w = width2; w > 0; w --) {
 694             cb = cb_ptr[0] - 128;
 695             cr = cr_ptr[0] - 128;
 696             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
 697             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
 698             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
 699
 700             /* output 2 pixels */
 701             RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
 702             RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
 703
 704             d1 += 6;
 705             y1_ptr += 2;
 706             cb_ptr++;
 707             cr_ptr++;
 708         }
 709         d += dst->linesize[0];
 710         y1_ptr += src->linesize[0] - width;
 711         cb_ptr += src->linesize[1] - width2;
 712         cr_ptr += src->linesize[2] - width2;
 713     }
 714 }
 715
 716 /* XXX: always use linesize. Return -1 if not supported */
 717 int img_convert(AVPicture *dst, int dst_pix_fmt,
 718                 AVPicture *src, int pix_fmt,
 719                 int width, int height)
 720 {
 721     int i;
 722
 723     assert(pix_fmt != PIX_FMT_ANY && dst_pix_fmt != PIX_FMT_ANY);
 724
 725     if (dst_pix_fmt == pix_fmt) {
 726         switch(pix_fmt) {
 727         case PIX_FMT_YUV420P:
 728             for(i=0;i<3;i++) {
 729                 if (i == 1) {
 730                     width >>= 1;
 731                     height >>= 1;
 732                 }
 733                 img_copy(dst->data[i], dst->linesize[i],
 734                          src->data[i], src->linesize[i],
 735                          width, height);
 736             }
 737             break;
 738         default:
 739             return -1;
 740         }
 741     } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
 742
 743         switch(pix_fmt) {
 744         case PIX_FMT_YUV411P:
 745             img_copy(dst->data[0], dst->linesize[0],
 746                      src->data[0], src->linesize[0],
 747                      width, height);
 748             conv411(dst->data[1], dst->linesize[1],
 749                     src->data[1], src->linesize[1],
 750                     width / 4, height);
 751             conv411(dst->data[2], dst->linesize[2],
 752                     src->data[2], src->linesize[2],
 753                     width / 4, height);
 754             break;
 755         case PIX_FMT_YUV410P:
 756             img_copy(dst->data[0], dst->linesize[0],
 757                      src->data[0], src->linesize[0],
 758                      width, height);
 759             grow22(dst->data[1], dst->linesize[1],
 760                      src->data[1], src->linesize[1],
 761                      width/2, height/2);
 762             grow22(dst->data[2], dst->linesize[2],
 763                      src->data[2], src->linesize[2],
 764                      width/2, height/2);
 765             break;
 766         case PIX_FMT_YUV420P:
 767             for(i=0;i<3;i++) {
 768                 img_copy(dst->data[i], dst->linesize[i],
 769                          src->data[i], src->linesize[i],
 770                          width, height);
 771             }
 772             break;
 773         case PIX_FMT_YUV422P:
 774             img_copy(dst->data[0], dst->linesize[0],
 775                      src->data[0], src->linesize[0],
 776                      width, height);
 777             width >>= 1;
 778             height >>= 1;
 779             for(i=1;i<3;i++) {
 780                 shrink2(dst->data[i], dst->linesize[i],
 781                         src->data[i], src->linesize[i],
 782                         width, height);
 783             }
 784             break;
 785         case PIX_FMT_YUV444P:
 786             img_copy(dst->data[0], dst->linesize[0],
 787                      src->data[0], src->linesize[0],
 788                      width, height);
 789             width >>= 1;
 790             height >>= 1;
 791             for(i=1;i<3;i++) {
 792                 shrink22(dst->data[i], dst->linesize[i],
 793                          src->data[i], src->linesize[i],
 794                          width, height);
 795             }
 796             break;
 797         case PIX_FMT_YUV422:
 798             yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 799                               src->data[0], width, height);
 800             break;
 801         case PIX_FMT_RGB24:
 802             rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 803                              src->data[0], width, height);
 804             break;
 805         case PIX_FMT_RGBA32:
 806             rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 807                              src->data[0], width, height);
 808             break;
 809         case PIX_FMT_BGR24:
 810             bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 811                              src->data[0], width, height);
 812             break;
 813         case PIX_FMT_BGRA32:
 814             bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 815                              src->data[0], width, height);
 816             break;
 817         case PIX_FMT_RGB565:
 818             rgb565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 819                              src->data[0], width, height);
 820             break;
 821         case PIX_FMT_RGB555:
 822             rgb555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 823                              src->data[0], width, height);
 824             break;
 825 /*        case PIX_FMT_RGB5551:
 826             rgb5551_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 827                              src->data[0], width, height);
 828             break;*/
 829         case PIX_FMT_BGR565:
 830             bgr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 831                              src->data[0], width, height);
 832             break;
 833         case PIX_FMT_BGR555:
 834             bgr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 835                              src->data[0], width, height);
 836             break;
 837 /*        case PIX_FMT_GBR565:
 838             gbr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 839                              src->data[0], width, height);
 840             break;
 841         case PIX_FMT_GBR555:
 842             gbr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
 843                              src->data[0], width, height);
 844             break;*/
 845         default:
 846             return -1;
 847         }
 848     } else if (dst_pix_fmt == PIX_FMT_RGB24) {
 849         switch(pix_fmt) {
 850         case PIX_FMT_YUV420P:
 851             yuv420p_to_rgb24(dst, src, width, height);
 852             break;
 853         case PIX_FMT_YUV422P:
 854             yuv422p_to_rgb24(dst, src, width, height);
 855             break;
 856         default:
 857             return -1;
 858         }
 859     } else if (dst_pix_fmt == PIX_FMT_RGBA32) {
 860         switch(pix_fmt) {
 861         case PIX_FMT_YUV420P:
 862             yuv420p_to_rgba32(dst, src, width, height);
 863             break;
 864         default:
 865             return -1;
 866         }
 867     } else if (dst_pix_fmt == PIX_FMT_BGRA32) {
 868         switch(pix_fmt) {
 869         case PIX_FMT_YUV420P:
 870             yuv420p_to_bgra32(dst, src, width, height);
 871             break;
 872         default:
 873             return -1;
 874         }
 875     } else {
 876         return -1;
 877     }
 878     return 0;
 879 }
 880
 881
 882 #ifdef HAVE_MMX
 883 #define DEINT_INPLACE_LINE_LUM \
 884                     movd_m2r(lum_m4[0],mm0);\
 885                     movd_m2r(lum_m3[0],mm1);\
 886                     movd_m2r(lum_m2[0],mm2);\
 887                     movd_m2r(lum_m1[0],mm3);\
 888                     movd_m2r(lum[0],mm4);\
 889                     punpcklbw_r2r(mm7,mm0);\
 890                     movd_r2m(mm2,lum_m4[0]);\
 891                     punpcklbw_r2r(mm7,mm1);\
 892                     punpcklbw_r2r(mm7,mm2);\
 893                     punpcklbw_r2r(mm7,mm3);\
 894                     punpcklbw_r2r(mm7,mm4);\
 895                     paddw_r2r(mm3,mm1);\
 896                     psllw_i2r(1,mm2);\
 897                     paddw_r2r(mm4,mm0);\
 898                     psllw_i2r(2,mm1);\
 899                     paddw_r2r(mm6,mm2);\
 900                     paddw_r2r(mm2,mm1);\
 901                     psubusw_r2r(mm0,mm1);\
 902                     psrlw_i2r(3,mm1);\
 903                     packuswb_r2r(mm7,mm1);\
 904                     movd_r2m(mm1,lum_m2[0]);
 905
 906 #define DEINT_LINE_LUM \
 907                     movd_m2r(lum_m4[0],mm0);\
 908                     movd_m2r(lum_m3[0],mm1);\
 909                     movd_m2r(lum_m2[0],mm2);\
 910                     movd_m2r(lum_m1[0],mm3);\
 911                     movd_m2r(lum[0],mm4);\
 912                     punpcklbw_r2r(mm7,mm0);\
 913                     punpcklbw_r2r(mm7,mm1);\
 914                     punpcklbw_r2r(mm7,mm2);\
 915                     punpcklbw_r2r(mm7,mm3);\
 916                     punpcklbw_r2r(mm7,mm4);\
 917                     paddw_r2r(mm3,mm1);\
 918                     psllw_i2r(1,mm2);\
 919                     paddw_r2r(mm4,mm0);\
 920                     psllw_i2r(2,mm1);\
 921                     paddw_r2r(mm6,mm2);\
 922                     paddw_r2r(mm2,mm1);\
 923                     psubusw_r2r(mm0,mm1);\
 924                     psrlw_i2r(3,mm1);\
 925                     packuswb_r2r(mm7,mm1);\
 926                     movd_r2m(mm1,dst[0]);
 927 #endif
 928
 929 /* filter parameters: [-1 4 2 4 -1] // 8 */
 930 static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
 931                                 int size)
 932 {
 933 #ifndef HAVE_MMX
 934     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 935     int sum;
 936
 937     for(;size > 0;size--) {
 938         sum = -lum_m4[0];
 939         sum += lum_m3[0] << 2;
 940         sum += lum_m2[0] << 1;
 941         sum += lum_m1[0] << 2;
 942         sum += -lum[0];
 943         dst[0] = cm[(sum + 4) >> 3];
 944         lum_m4++;
 945         lum_m3++;
 946         lum_m2++;
 947         lum_m1++;
 948         lum++;
 949         dst++;
 950     }
 951 #else
 952
 953     for (;size > 3; size-=4) {
 954         DEINT_LINE_LUM
 955         lum_m4+=4;
 956         lum_m3+=4;
 957         lum_m2+=4;
 958         lum_m1+=4;
 959         lum+=4;
 960         dst+=4;
 961     }
 962 #endif
 963 }
 964 static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
 965                              int size)
 966 {
 967 #ifndef HAVE_MMX
 968     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 969     int sum;
 970
 971     for(;size > 0;size--) {
 972         sum = -lum_m4[0];
 973         sum += lum_m3[0] << 2;
 974         sum += lum_m2[0] << 1;
 975         lum_m4[0]=lum_m2[0];
 976         sum += lum_m1[0] << 2;
 977         sum += -lum[0];
 978         lum_m2[0] = cm[(sum + 4) >> 3];
 979         lum_m4++;
 980         lum_m3++;
 981         lum_m2++;
 982         lum_m1++;
 983         lum++;
 984     }
 985 #else
 986
 987     for (;size > 3; size-=4) {
 988         DEINT_INPLACE_LINE_LUM
 989         lum_m4+=4;
 990         lum_m3+=4;
 991         lum_m2+=4;
 992         lum_m1+=4;
 993         lum+=4;
 994     }
 995 #endif
 996 }
 997
 998 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
 999    top field is copied as is, but the bottom field is deinterlaced
1000    against the top field. */
1001 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
1002                                     UINT8 *src1, int src_wrap,
1003                                     int width, int height)
1004 {
1005     UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
1006     int y;
1007
1008     src_m2 = src1;
1009     src_m1 = src1;
1010     src_0=&src_m1[src_wrap];
1011     src_p1=&src_0[src_wrap];
1012     src_p2=&src_p1[src_wrap];
1013     for(y=0;y<(height-2);y+=2) {
1014         memcpy(dst,src_m1,width);
1015         dst += dst_wrap;
1016         deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
1017         src_m2 = src_0;
1018         src_m1 = src_p1;
1019         src_0 = src_p2;
1020         src_p1 += 2*src_wrap;
1021         src_p2 += 2*src_wrap;
1022         dst += dst_wrap;
1023     }
1024     memcpy(dst,src_m1,width);
1025     dst += dst_wrap;
1026     /* do last line */
1027     deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
1028 }
1029
1030 static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
1031                                      int width, int height)
1032 {
1033     UINT8 *src_m1, *src_0, *src_p1, *src_p2;
1034     int y;
1035     UINT8 *buf;
1036     buf = (UINT8*)av_malloc(width);
1037
1038     src_m1 = src1;
1039     memcpy(buf,src_m1,width);
1040     src_0=&src_m1[src_wrap];
1041     src_p1=&src_0[src_wrap];
1042     src_p2=&src_p1[src_wrap];
1043     for(y=0;y<(height-2);y+=2) {
1044         deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
1045         src_m1 = src_p1;
1046         src_0 = src_p2;
1047         src_p1 += 2*src_wrap;
1048         src_p2 += 2*src_wrap;
1049     }
1050     /* do last line */
1051     deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
1052     av_free(buf);
1053 }
1054
1055
1056 /* deinterlace - if not supported return -1 */
1057 int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
1058                           int pix_fmt, int width, int height)
1059 {
1060     int i;
1061
1062     if (pix_fmt != PIX_FMT_YUV420P &&
1063         pix_fmt != PIX_FMT_YUV422P &&
1064         pix_fmt != PIX_FMT_YUV444P)
1065         return -1;
1066     if ((width & 3) != 0 || (height & 3) != 0)
1067         return -1;
1068
1069 #ifdef HAVE_MMX
1070     {
1071         mmx_t rounder;
1072         rounder.uw[0]=4;
1073         rounder.uw[1]=4;
1074         rounder.uw[2]=4;
1075         rounder.uw[3]=4;
1076         pxor_r2r(mm7,mm7);
1077         movq_m2r(rounder,mm6);
1078     }
1079 #endif
1080
1081
1082     for(i=0;i<3;i++) {
1083         if (i == 1) {
1084             switch(pix_fmt) {
1085             case PIX_FMT_YUV420P:
1086                 width >>= 1;
1087                 height >>= 1;
1088                 break;
1089             case PIX_FMT_YUV422P:
1090                 width >>= 1;
1091                 break;
1092             default:
1093                 break;
1094             }
1095         }
1096         if (src == dst) {
1097             deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
1098                                  width, height);
1099         } else {
1100             deinterlace_bottom_field(dst->data[i],dst->linesize[i],
1101                                         src->data[i], src->linesize[i],
1102                                         width, height);
1103         }
1104     }
1105 #ifdef HAVE_MMX
1106     emms();
1107 #endif
1108     return 0;
1109 }
1110
1111 #undef FIX