hvirtual/quicktime/libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19 #include "avcodec.h"
  20 #include "dsputil.h"
  21
  22 #ifdef USE_FASTMEMCPY
  23 #include "fastmemcpy.h"
  24 #endif
  25 extern int mm_flags;
  26
  27 #define NB_COMPONENTS 3
  28
  29 #define PHASE_BITS 4
  30 #define NB_PHASES  (1 << PHASE_BITS)
  31 #define NB_TAPS    4
  32 #define FCENTER    1  /* index of the center of the filter */
  33 //#define TEST    1  /* Test it */
  34
  35 #define POS_FRAC_BITS 16
  36 #define POS_FRAC      (1 << POS_FRAC_BITS)
  37 /* 6 bits precision is needed for MMX */
  38 #define FILTER_BITS   8
  39
  40 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  41
  42 struct ImgReSampleContext {
  43     int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
  44     int h_incr, v_incr;
  45     INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  46     INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  47     UINT8 *line_buf;
  48 };
  49
  50 static inline int get_phase(int pos)
  51 {
  52     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  53 }
  54
  55 /* This function must be optimized */
  56 static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  57                             int src_start, int src_incr, INT16 *filters)
  58 {
  59     int src_pos, phase, sum, i;
  60     UINT8 *s;
  61     INT16 *filter;
  62
  63     src_pos = src_start;
  64     for(i=0;i<dst_width;i++) {
  65 #ifdef TEST
  66         /* test */
  67         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  68             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  69             av_abort();
  70 #endif
  71         s = src + (src_pos >> POS_FRAC_BITS);
  72         phase = get_phase(src_pos);
  73         filter = filters + phase * NB_TAPS;
  74 #if NB_TAPS == 4
  75         sum = s[0] * filter[0] +
  76             s[1] * filter[1] +
  77             s[2] * filter[2] +
  78             s[3] * filter[3];
  79 #else
  80         {
  81             int j;
  82             sum = 0;
  83             for(j=0;j<NB_TAPS;j++)
  84                 sum += s[j] * filter[j];
  85         }
  86 #endif
  87         sum = sum >> FILTER_BITS;
  88         if (sum < 0)
  89             sum = 0;
  90         else if (sum > 255)
  91             sum = 255;
  92         dst[0] = sum;
  93         src_pos += src_incr;
  94         dst++;
  95     }
  96 }
  97
  98 /* This function must be optimized */
  99 static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
 100                        INT16 *filter)
 101 {
 102     int sum, i;
 103     UINT8 *s;
 104
 105     s = src;
 106     for(i=0;i<dst_width;i++) {
 107 #if NB_TAPS == 4
 108         sum = s[0 * wrap] * filter[0] +
 109             s[1 * wrap] * filter[1] +
 110             s[2 * wrap] * filter[2] +
 111             s[3 * wrap] * filter[3];
 112 #else
 113         {
 114             int j;
 115             UINT8 *s1 = s;
 116
 117             sum = 0;
 118             for(j=0;j<NB_TAPS;j++) {
 119                 sum += s1[0] * filter[j];
 120                 s1 += wrap;
 121             }
 122         }
 123 #endif
 124         sum = sum >> FILTER_BITS;
 125         if (sum < 0)
 126             sum = 0;
 127         else if (sum > 255)
 128             sum = 255;
 129         dst[0] = sum;
 130         dst++;
 131         s++;
 132     }
 133 }
 134
 135 #ifdef HAVE_MMX
 136
 137 #include "i386/mmx.h"
 138
 139 #define FILTER4(reg) \
 140 {\
 141         s = src + (src_pos >> POS_FRAC_BITS);\
 142         phase = get_phase(src_pos);\
 143         filter = filters + phase * NB_TAPS;\
 144         movq_m2r(*s, reg);\
 145         punpcklbw_r2r(mm7, reg);\
 146         movq_m2r(*filter, mm6);\
 147         pmaddwd_r2r(reg, mm6);\
 148         movq_r2r(mm6, reg);\
 149         psrlq_i2r(32, reg);\
 150         paddd_r2r(mm6, reg);\
 151         psrad_i2r(FILTER_BITS, reg);\
 152         src_pos += src_incr;\
 153 }
 154
 155 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
 156
 157 /* XXX: do four pixels at a time */
 158 static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
 159                                  int src_start, int src_incr, INT16 *filters)
 160 {
 161     int src_pos, phase;
 162     UINT8 *s;
 163     INT16 *filter;
 164     mmx_t tmp;
 165
 166     src_pos = src_start;
 167     pxor_r2r(mm7, mm7);
 168
 169     while (dst_width >= 4) {
 170
 171         FILTER4(mm0);
 172         FILTER4(mm1);
 173         FILTER4(mm2);
 174         FILTER4(mm3);
 175
 176         packuswb_r2r(mm7, mm0);
 177         packuswb_r2r(mm7, mm1);
 178         packuswb_r2r(mm7, mm3);
 179         packuswb_r2r(mm7, mm2);
 180         movq_r2m(mm0, tmp);
 181         dst[0] = tmp.ub[0];
 182         movq_r2m(mm1, tmp);
 183         dst[1] = tmp.ub[0];
 184         movq_r2m(mm2, tmp);
 185         dst[2] = tmp.ub[0];
 186         movq_r2m(mm3, tmp);
 187         dst[3] = tmp.ub[0];
 188         dst += 4;
 189         dst_width -= 4;
 190     }
 191     while (dst_width > 0) {
 192         FILTER4(mm0);
 193         packuswb_r2r(mm7, mm0);
 194         movq_r2m(mm0, tmp);
 195         dst[0] = tmp.ub[0];
 196         dst++;
 197         dst_width--;
 198     }
 199     emms();
 200 }
 201
 202 static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
 203                             INT16 *filter)
 204 {
 205     int sum, i, v;
 206     UINT8 *s;
 207     mmx_t tmp;
 208     mmx_t coefs[4];
 209
 210     for(i=0;i<4;i++) {
 211         v = filter[i];
 212         coefs[i].uw[0] = v;
 213         coefs[i].uw[1] = v;
 214         coefs[i].uw[2] = v;
 215         coefs[i].uw[3] = v;
 216     }
 217
 218     pxor_r2r(mm7, mm7);
 219     s = src;
 220     while (dst_width >= 4) {
 221         movq_m2r(s[0 * wrap], mm0);
 222         punpcklbw_r2r(mm7, mm0);
 223         movq_m2r(s[1 * wrap], mm1);
 224         punpcklbw_r2r(mm7, mm1);
 225         movq_m2r(s[2 * wrap], mm2);
 226         punpcklbw_r2r(mm7, mm2);
 227         movq_m2r(s[3 * wrap], mm3);
 228         punpcklbw_r2r(mm7, mm3);
 229
 230         pmullw_m2r(coefs[0], mm0);
 231         pmullw_m2r(coefs[1], mm1);
 232         pmullw_m2r(coefs[2], mm2);
 233         pmullw_m2r(coefs[3], mm3);
 234
 235         paddw_r2r(mm1, mm0);
 236         paddw_r2r(mm3, mm2);
 237         paddw_r2r(mm2, mm0);
 238         psraw_i2r(FILTER_BITS, mm0);
 239
 240         packuswb_r2r(mm7, mm0);
 241         movq_r2m(mm0, tmp);
 242
 243         *(UINT32 *)dst = tmp.ud[0];
 244         dst += 4;
 245         s += 4;
 246         dst_width -= 4;
 247     }
 248     while (dst_width > 0) {
 249         sum = s[0 * wrap] * filter[0] +
 250             s[1 * wrap] * filter[1] +
 251             s[2 * wrap] * filter[2] +
 252             s[3 * wrap] * filter[3];
 253         sum = sum >> FILTER_BITS;
 254         if (sum < 0)
 255             sum = 0;
 256         else if (sum > 255)
 257             sum = 255;
 258         dst[0] = sum;
 259         dst++;
 260         s++;
 261         dst_width--;
 262     }
 263     emms();
 264 }
 265 #endif
 266
 267 #ifdef HAVE_ALTIVEC
 268 typedef union {
 269     vector unsigned char v;
 270     unsigned char c[16];
 271 } vec_uc_t;
 272
 273 typedef union {
 274     vector signed short v;
 275     signed short s[8];
 276 } vec_ss_t;
 277
 278 void v_resample16_altivec(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
 279                             INT16 *filter)
 280 {
 281     int sum, i;
 282     uint8_t *s;
 283     vector unsigned char *tv, tmp, dstv, zero;
 284     vec_ss_t srchv[4], srclv[4], fv[4];
 285     vector signed short zeros, sumhv, sumlv;
 286     s = src;
 287
 288     for(i=0;i<4;i++)
 289     {
 290         /*
 291            The vec_madds later on does an implicit >>15 on the result.
 292            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 293            a signed short, we have just enough bits to pre-shift our
 294            filter constants <<7 to compensate for vec_madds.
 295         */
 296         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 297         fv[i].v = vec_splat(fv[i].v, 0);
 298     }
 299
 300     zero = vec_splat_u8(0);
 301     zeros = vec_splat_s16(0);
 302
 303
 304     /*
 305        When we're resampling, we'd ideally like both our input buffers,
 306        and output buffers to be 16-byte aligned, so we can do both aligned
 307        reads and writes. Sadly we can't always have this at the moment, so
 308        we opt for aligned writes, as unaligned writes have a huge overhead.
 309        To do this, do enough scalar resamples to get dst 16-byte aligned.
 310     */
 311     i = (-(int)dst) & 0xf;
 312     while(i>0) {
 313         sum = s[0 * wrap] * filter[0] +
 314         s[1 * wrap] * filter[1] +
 315         s[2 * wrap] * filter[2] +
 316         s[3 * wrap] * filter[3];
 317         sum = sum >> FILTER_BITS;
 318         if (sum<0) sum = 0; else if (sum>255) sum=255;
 319         dst[0] = sum;
 320         dst++;
 321         s++;
 322         dst_width--;
 323         i--;
 324     }
 325
 326     /* Do our altivec resampling on 16 pixels at once. */
 327     while(dst_width>=16) {
 328         /*
 329            Read 16 (potentially unaligned) bytes from each of
 330            4 lines into 4 vectors, and split them into shorts.
 331            Interleave the multipy/accumulate for the resample
 332            filter with the loads to hide the 3 cycle latency
 333            the vec_madds have.
 334         */
 335         tv = (vector unsigned char *) &s[0 * wrap];
 336         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 337         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 338         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 339         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 340         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 341
 342         tv = (vector unsigned char *) &s[1 * wrap];
 343         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 344         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 345         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 346         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 347         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 348
 349         tv = (vector unsigned char *) &s[2 * wrap];
 350         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 351         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 352         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 353         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 354         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 355
 356         tv = (vector unsigned char *) &s[3 * wrap];
 357         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 358         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 359         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 360         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 361         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 362
 363         /*
 364            Pack the results into our destination vector,
 365            and do an aligned write of that back to memory.
 366         */
 367         dstv = vec_packsu(sumhv, sumlv) ;
 368         vec_st(dstv, 0, (vector unsigned char *) dst);
 369
 370         dst+=16;
 371         s+=16;
 372         dst_width-=16;
 373     }
 374
 375     /*
 376        If there are any leftover pixels, resample them
 377        with the slow scalar method.
 378     */
 379     while(dst_width>0) {
 380         sum = s[0 * wrap] * filter[0] +
 381         s[1 * wrap] * filter[1] +
 382         s[2 * wrap] * filter[2] +
 383         s[3 * wrap] * filter[3];
 384         sum = sum >> FILTER_BITS;
 385         if (sum<0) sum = 0; else if (sum>255) sum=255;
 386         dst[0] = sum;
 387         dst++;
 388         s++;
 389         dst_width--;
 390     }
 391 }
 392 #endif
 393
 394 /* slow version to handle limit cases. Does not need optimisation */
 395 static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
 396                             int src_start, int src_incr, INT16 *filters)
 397 {
 398     int src_pos, phase, sum, j, v, i;
 399     UINT8 *s, *src_end;
 400     INT16 *filter;
 401
 402     src_end = src + src_width;
 403     src_pos = src_start;
 404     for(i=0;i<dst_width;i++) {
 405         s = src + (src_pos >> POS_FRAC_BITS);
 406         phase = get_phase(src_pos);
 407         filter = filters + phase * NB_TAPS;
 408         sum = 0;
 409         for(j=0;j<NB_TAPS;j++) {
 410             if (s < src)
 411                 v = src[0];
 412             else if (s >= src_end)
 413                 v = src_end[-1];
 414             else
 415                 v = s[0];
 416             sum += v * filter[j];
 417             s++;
 418         }
 419         sum = sum >> FILTER_BITS;
 420         if (sum < 0)
 421             sum = 0;
 422         else if (sum > 255)
 423             sum = 255;
 424         dst[0] = sum;
 425         src_pos += src_incr;
 426         dst++;
 427     }
 428 }
 429
 430 static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
 431                        int src_start, int src_incr, INT16 *filters)
 432 {
 433     int n, src_end;
 434
 435     if (src_start < 0) {
 436         n = (0 - src_start + src_incr - 1) / src_incr;
 437         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 438         dst += n;
 439         dst_width -= n;
 440         src_start += n * src_incr;
 441     }
 442     src_end = src_start + dst_width * src_incr;
 443     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 444         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 445             src_incr;
 446     } else {
 447         n = dst_width;
 448     }
 449 #ifdef HAVE_MMX
 450     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 451         h_resample_fast4_mmx(dst, n,
 452                              src, src_width, src_start, src_incr, filters);
 453     else
 454 #endif
 455         h_resample_fast(dst, n,
 456                         src, src_width, src_start, src_incr, filters);
 457     if (n < dst_width) {
 458         dst += n;
 459         dst_width -= n;
 460         src_start += n * src_incr;
 461         h_resample_slow(dst, dst_width,
 462                         src, src_width, src_start, src_incr, filters);
 463     }
 464 }
 465
 466 static void component_resample(ImgReSampleContext *s,
 467                                UINT8 *output, int owrap, int owidth, int oheight,
 468                                UINT8 *input, int iwrap, int iwidth, int iheight)
 469 {
 470     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 471     UINT8 *new_line, *src_line;
 472
 473     last_src_y = - FCENTER - 1;
 474     /* position of the bottom of the filter in the source image */
 475     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 476     ring_y = NB_TAPS; /* position in ring buffer */
 477     for(y=0;y<oheight;y++) {
 478         /* apply horizontal filter on new lines from input if needed */
 479         src_y1 = src_y >> POS_FRAC_BITS;
 480         while (last_src_y < src_y1) {
 481             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 482                 ring_y = NB_TAPS;
 483             last_src_y++;
 484             /* handle limit conditions : replicate line (slightly
 485                inefficient because we filter multiple times) */
 486             y1 = last_src_y;
 487             if (y1 < 0) {
 488                 y1 = 0;
 489             } else if (y1 >= iheight) {
 490                 y1 = iheight - 1;
 491             }
 492             src_line = input + y1 * iwrap;
 493             new_line = s->line_buf + ring_y * owidth;
 494             /* apply filter and handle limit cases correctly */
 495             h_resample(new_line, owidth,
 496                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 497                        &s->h_filters[0][0]);
 498             /* handle ring buffer wraping */
 499             if (ring_y >= LINE_BUF_HEIGHT) {
 500                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 501                        new_line, owidth);
 502             }
 503         }
 504         /* apply vertical filter */
 505         phase_y = get_phase(src_y);
 506 #ifdef HAVE_MMX
 507         /* desactivated MMX because loss of precision */
 508         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 509             v_resample4_mmx(output, owidth,
 510                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 511                             &s->v_filters[phase_y][0]);
 512         else
 513 #endif
 514 #ifdef HAVE_ALTIVEC
 515             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 516                 v_resample16_altivec(output, owidth,
 517                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 518                                 &s->v_filters[phase_y][0]);
 519         else
 520 #endif
 521             v_resample(output, owidth,
 522                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 523                        &s->v_filters[phase_y][0]);
 524
 525         src_y += s->v_incr;
 526         output += owrap;
 527     }
 528 }
 529
 530 /* XXX: the following filter is quite naive, but it seems to suffice
 531    for 4 taps */
 532 static void build_filter(INT16 *filter, float factor)
 533 {
 534     int ph, i, v;
 535     float x, y, tab[NB_TAPS], norm, mult;
 536
 537     /* if upsampling, only need to interpolate, no filter */
 538     if (factor > 1.0)
 539         factor = 1.0;
 540
 541     for(ph=0;ph<NB_PHASES;ph++) {
 542         norm = 0;
 543         for(i=0;i<NB_TAPS;i++) {
 544
 545             x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
 546             if (x == 0)
 547                 y = 1.0;
 548             else
 549                 y = sin(x) / x;
 550             tab[i] = y;
 551             norm += y;
 552         }
 553
 554         /* normalize so that an uniform color remains the same */
 555         mult = (float)(1 << FILTER_BITS) / norm;
 556         for(i=0;i<NB_TAPS;i++) {
 557             v = (int)(tab[i] * mult);
 558             filter[ph * NB_TAPS + i] = v;
 559         }
 560     }
 561 }
 562
 563 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 564                                       int iwidth, int iheight)
 565 {
 566         return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
 567 }
 568
 569 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 570                                       int iwidth, int iheight,
 571                                       int topBand, int bottomBand,
 572                                       int leftBand, int rightBand)
 573 {
 574     ImgReSampleContext *s;
 575
 576     s = av_mallocz(sizeof(ImgReSampleContext));
 577     if (!s)
 578         return NULL;
 579     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 580     if (!s->line_buf)
 581         goto fail;
 582
 583     s->owidth = owidth;
 584     s->oheight = oheight;
 585     s->iwidth = iwidth;
 586     s->iheight = iheight;
 587     s->topBand = topBand;
 588     s->bottomBand = bottomBand;
 589     s->leftBand = leftBand;
 590     s->rightBand = rightBand;
 591
 592     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
 593     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
 594
 595     build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
 596     build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
 597
 598     return s;
 599  fail:
 600     av_free(s);
 601     return NULL;
 602 }
 603
 604 void img_resample(ImgReSampleContext *s,
 605                   AVPicture *output, AVPicture *input)
 606 {
 607     int i, shift;
 608
 609     for(i=0;i<3;i++) {
 610         shift = (i == 0) ? 0 : 1;
 611         component_resample(s, output->data[i], output->linesize[i],
 612                            s->owidth >> shift, s->oheight >> shift,
 613                            input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
 614                            input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
 615                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 616     }
 617 }
 618
 619 void img_resample_close(ImgReSampleContext *s)
 620 {
 621     av_free(s->line_buf);
 622     av_free(s);
 623 }
 624
 625 #ifdef TEST
 626
 627 void *av_mallocz(int size)
 628 {
 629     void *ptr;
 630     ptr = malloc(size);
 631     memset(ptr, 0, size);
 632     return ptr;
 633 }
 634
 635 void av_free(void *ptr)
 636 {
 637     /* XXX: this test should not be needed on most libcs */
 638     if (ptr)
 639         free(ptr);
 640 }
 641
 642 /* input */
 643 #define XSIZE 256
 644 #define YSIZE 256
 645 UINT8 img[XSIZE * YSIZE];
 646
 647 /* output */
 648 #define XSIZE1 512
 649 #define YSIZE1 512
 650 UINT8 img1[XSIZE1 * YSIZE1];
 651 UINT8 img2[XSIZE1 * YSIZE1];
 652
 653 void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
 654 {
 655     FILE *f;
 656     f=fopen(filename,"w");
 657     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 658     fwrite(img,1, xsize * ysize,f);
 659     fclose(f);
 660 }
 661
 662 static void dump_filter(INT16 *filter)
 663 {
 664     int i, ph;
 665
 666     for(ph=0;ph<NB_PHASES;ph++) {
 667         printf("%2d: ", ph);
 668         for(i=0;i<NB_TAPS;i++) {
 669             printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 670         }
 671         printf("\n");
 672     }
 673 }
 674
 675 #ifdef HAVE_MMX
 676 int mm_flags;
 677 #endif
 678
 679 int main(int argc, char **argv)
 680 {
 681     int x, y, v, i, xsize, ysize;
 682     ImgReSampleContext *s;
 683     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 684     char buf[256];
 685
 686     /* build test image */
 687     for(y=0;y<YSIZE;y++) {
 688         for(x=0;x<XSIZE;x++) {
 689             if (x < XSIZE/2 && y < YSIZE/2) {
 690                 if (x < XSIZE/4 && y < YSIZE/4) {
 691                     if ((x % 10) <= 6 &&
 692                         (y % 10) <= 6)
 693                         v = 0xff;
 694                     else
 695                         v = 0x00;
 696                 } else if (x < XSIZE/4) {
 697                     if (x & 1)
 698                         v = 0xff;
 699                     else
 700                         v = 0;
 701                 } else if (y < XSIZE/4) {
 702                     if (y & 1)
 703                         v = 0xff;
 704                     else
 705                         v = 0;
 706                 } else {
 707                     if (y < YSIZE*3/8) {
 708                         if ((y+x) & 1)
 709                             v = 0xff;
 710                         else
 711                             v = 0;
 712                     } else {
 713                         if (((x+3) % 4) <= 1 &&
 714                             ((y+3) % 4) <= 1)
 715                             v = 0xff;
 716                         else
 717                             v = 0x00;
 718                     }
 719                 }
 720             } else if (x < XSIZE/2) {
 721                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 722             } else if (y < XSIZE/2) {
 723                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 724             } else {
 725                 v = ((x + y - XSIZE) * 255) / XSIZE;
 726             }
 727             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 728         }
 729     }
 730     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 731     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 732         fact = factors[i];
 733         xsize = (int)(XSIZE * fact);
 734         ysize = (int)((YSIZE - 100) * fact);
 735         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
 736         printf("Factor=%0.2f\n", fact);
 737         dump_filter(&s->h_filters[0][0]);
 738         component_resample(s, img1, xsize, xsize, ysize,
 739                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 740         img_resample_close(s);
 741
 742         sprintf(buf, "/tmp/out%d.pgm", i);
 743         save_pgm(buf, img1, xsize, ysize);
 744     }
 745
 746     /* mmx test */
 747 #ifdef HAVE_MMX
 748     printf("MMX test\n");
 749     fact = 0.72;
 750     xsize = (int)(XSIZE * fact);
 751     ysize = (int)(YSIZE * fact);
 752     mm_flags = MM_MMX;
 753     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 754     component_resample(s, img1, xsize, xsize, ysize,
 755                        img, XSIZE, XSIZE, YSIZE);
 756
 757     mm_flags = 0;
 758     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 759     component_resample(s, img2, xsize, xsize, ysize,
 760                        img, XSIZE, XSIZE, YSIZE);
 761     if (memcmp(img1, img2, xsize * ysize) != 0) {
 762         fprintf(stderr, "mmx error\n");
 763         exit(1);
 764     }
 765     printf("MMX OK\n");
 766 #endif
 767     return 0;
 768 }
 769
 770 #endif