mpeg2enc/quantize.c

   1 /* quantize.c, quantization / inverse quantization                          */
   2
   3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
   4
   5 /*
   6  * Disclaimer of Warranty
   7  *
   8  * These software programs are available to the user without any license fee or
   9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  10  * any and all warranties, whether express, implied, or statuary, including any
  11  * implied warranties or merchantability or of fitness for a particular
  12  * purpose.  In no event shall the copyright-holder be liable for any
  13  * incidental, punitive, or consequential damages of any kind whatsoever
  14  * arising from the use of these programs.
  15  *
  16  * This disclaimer of warranty extends to the user of these programs and user's
  17  * customers, employees, agents, transferees, successors, and assigns.
  18  *
  19  * The MPEG Software Simulation Group does not represent or warrant that the
  20  * programs furnished hereunder are free of infringement of any third-party
  21  * patents.
  22  *
  23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
  24  * are subject to royalty fees to patent holders.  Many of these patents are
  25  * general enough such that they are unavoidable regardless of implementation
  26  * design.
  27  *
  28  */
  29
  30 #include "config.h"
  31 #include <stdio.h>
  32 #include <math.h>
  33 #include <fenv.h>
  34 #include "global.h"
  35 #include "cpu_accel.h"
  36 #include "simd.h"
  37 #include "fastintfns.h"
  38
  39
  40 /* Global function pointers for SIMD-dependent functions */
  41 int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
  42                                                 int mquant, int *nonsat_mquant);
  43 int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );
  44
  45 /* Local functions pointers for SIMD-dependent functions */
  46
  47 static void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst,  uint16_t *quant_mat);
  48
  49
  50 static int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
  51 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);
  52
  53
  54 /*
  55   Initialise quantization routines.
  56   Currently just setting up MMX routines if available...
  57  */
  58
  59 void init_quantizer()
  60 {
  61   int flags;
  62   flags = cpu_accel();
  63 #ifdef X86_CPU
  64   if( (flags & ACCEL_X86_MMX) != 0 ) /* MMX CPU */
  65         {
  66                 if(verbose) fprintf( stderr, "SETTING " );
  67                 if( (flags & ACCEL_X86_3DNOW) != 0 )
  68                 {
  69                         if(verbose) fprintf( stderr, "3DNOW and ");
  70                         pquant_non_intra = quant_non_intra_3dnow;
  71                 }
  72                 else if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  73                 {
  74                         if(verbose) fprintf( stderr, "SSE and ");
  75                         pquant_non_intra = quant_non_intra_sse;
  76                 }
  77                 else
  78                 {
  79                         pquant_non_intra = quant_non_intra;
  80                 }
  81
  82                 if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  83                 {
  84                         if(verbose) fprintf( stderr, "EXTENDED MMX");
  85                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  86                         piquant_non_intra_m1 = iquant_non_intra_m1_sse;
  87                 }
  88                 else
  89                 {
  90                         if(verbose) fprintf( stderr, "MMX");
  91                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  92                         piquant_non_intra_m1 = iquant_non_intra_m1_mmx;
  93                 }
  94                 if(verbose) fprintf( stderr, " for QUANTIZER!\n");
  95         }
  96   else
  97 #endif
  98         {
  99           pquant_non_intra = quant_non_intra;
 100           pquant_weight_coeff_sum = quant_weight_coeff_sum;
 101           piquant_non_intra_m1 = iquant_non_intra_m1;
 102         }
 103 }
 104
 105 /*
 106  *
 107  * Computes the next quantisation up.  Used to avoid saturation
 108  * in macroblock coefficients - common in MPEG-1 - which causes
 109  * nasty artefacts.
 110  *
 111  * NOTE: Does no range checking...
 112  *
 113  */
 114
 115
 116 int next_larger_quant( pict_data_s *picture, int quant )
 117 {
 118         if( picture->q_scale_type )
 119                 {
 120                         if( map_non_linear_mquant[quant]+1 > 31 )
 121                                 return quant;
 122                         else
 123                                 return non_linear_mquant_table[map_non_linear_mquant[quant]+1];
 124                 }
 125         else
 126                 {
 127                         if( quant+2 > 31 )
 128                                 return quant;
 129                         else
 130                                 return quant+2;
 131                 }
 132
 133 }
 134
 135 /*
 136  * Quantisation for intra blocks using Test Model 5 quantization
 137  *
 138  * this quantizer has a bias of 1/8 stepsize towards zero
 139  * (except for the DC coefficient)
 140  *
 141         PRECONDITION: src dst point to *disinct* memory buffers...
 142                           of block_count *adjact* int16_t[64] arrays...
 143  *
 144  * RETURN: 1 If non-zero coefficients left after quantisaiont 0 otherwise
 145  */
 146
 147 void quant_intra(
 148         pict_data_s *picture,
 149         int16_t *src,
 150         int16_t *dst,
 151         int mquant,
 152         int *nonsat_mquant
 153         )
 154 {
 155   int16_t *psrc,*pbuf;
 156   int i,comp;
 157   int x, y, d;
 158   int clipping;
 159   int clipvalue  = dctsatlim;
 160   uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;
 161
 162
 163   /* Inspired by suggestion by Juan.  Quantize a little harder if we clip...
 164    */
 165
 166   do
 167         {
 168           clipping = 0;
 169           pbuf = dst;
 170           psrc = src;
 171           for( comp = 0; comp<block_count && !clipping; ++comp )
 172           {
 173                 x = psrc[0];
 174                 d = 8>>picture->dc_prec; /* intra_dc_mult */
 175                 pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */
 176
 177
 178                 for (i=1; i<64 ; i++)
 179                   {
 180                         x = psrc[i];
 181                         d = quant_mat[i];
 182                         /* RJ: save one divide operation */
 183                         y = ((abs(x) << 5)+ ((3 * quant_mat[i]) >> 2)) / (quant_mat[i] << 1);
 184                         if ( y > clipvalue )
 185                           {
 186                                 clipping = 1;
 187                                 mquant = next_larger_quant( picture, mquant );
 188                                 quant_mat = intra_q_tbl[mquant];
 189                                 break;
 190                           }
 191
 192                         pbuf[i] = intsamesign(x,y);
 193                   }
 194                 pbuf += 64;
 195                 psrc += 64;
 196           }
 197
 198         } while( clipping );
 199   *nonsat_mquant = mquant;
 200 }
 201
 202
 203 /*
 204  * Quantisation matrix weighted Coefficient sum fixed-point
 205  * integer with low 16 bits fractional...
 206  * To be used for rate control as a measure of dct block
 207  * complexity...
 208  *
 209  */
 210
 211 int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
 212 {
 213   int i;
 214   int sum = 0;
 215    for( i = 0; i < 64; i+=2 )
 216         {
 217                 sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
 218         }
 219     return sum;
 220   /* In case you're wondering typical average coeff_sum's for a rather
 221         noisy video are around 20.0.  */
 222 }
 223
 224
 225
 226 /*
 227  * Quantisation for non-intra blocks using Test Model 5 quantization
 228  *
 229  * this quantizer has a bias of 1/8 stepsize towards zero
 230  * (except for the DC coefficient)
 231  *
 232  * A.Stevens 2000: The above comment is nonsense.  Only the intra quantiser does
 233  * this.  This one just truncates with a modest bias of 1/(4*quant_matrix_scale)
 234  * to 1.
 235  *
 236  *      PRECONDITION: src dst point to *disinct* memory buffers...
 237  *                    of block_count *adjacent* int16_t[64] arrays...
 238  *
 239  * RETURN: A bit-mask of block_count bits indicating non-zero blocks (a 1).
 240  *
 241  * TODO: A candidate for use of efficient abs and "intsamesign". If only gcc understood
 242  * PPro conditional moves...
 243  */
 244
 245 int quant_non_intra(
 246                                                    pict_data_s *picture,
 247                                                    int16_t *src, int16_t *dst,
 248                                                    int mquant,
 249                                                    int *nonsat_mquant)
 250 {
 251         int i;
 252         int x, y, d;
 253         int nzflag;
 254         int coeff_count;
 255         int clipvalue  = dctsatlim;
 256         int flags = 0;
 257         int saturated = 0;
 258         uint16_t *quant_mat = inter_q_tbl[mquant]/* inter_q */;
 259
 260         coeff_count = 64*block_count;
 261         flags = 0;
 262         nzflag = 0;
 263         for (i=0; i<coeff_count; ++i)
 264         {
 265 restart:
 266                 if( (i%64) == 0 )
 267                 {
 268                         nzflag = (nzflag<<1) | !!flags;
 269                         flags = 0;
 270
 271                 }
 272                 /* RJ: save one divide operation */
 273
 274                 x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
 275                 d = (int)quant_mat[(i&63)];
 276                 /* A.Stevens 2000: Given the math of non-intra frame
 277                    quantisation / inverse quantisation I always though the
 278                    funny little foudning factor was bogus.  It seems to be
 279                    the encoder needs less bits if you simply divide!
 280                 */
 281
 282                 y = (x<<4) /  (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
 283                 if ( y > clipvalue )
 284                 {
 285                         if( saturated )
 286                         {
 287                                 y = clipvalue;
 288                         }
 289                         else
 290                         {
 291                                 int new_mquant = next_larger_quant( picture, mquant );
 292                                 if( new_mquant != mquant )
 293                                 {
 294                                         mquant = new_mquant;
 295                                         quant_mat = inter_q_tbl[mquant];
 296                                 }
 297                                 else
 298                                 {
 299                                         saturated = 1;
 300                                 }
 301                                 i=0;
 302                                 nzflag =0;
 303                                 goto restart;
 304                         }
 305                 }
 306                 dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
 307                 flags |= dst[i];
 308         }
 309         nzflag = (nzflag<<1) | !!flags;
 310
 311     *nonsat_mquant = mquant;
 312     return nzflag;
 313 }
 314
 315 /* MPEG-1 inverse quantization */
 316 static void iquant1_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 317 {
 318   int i, val;
 319   uint16_t *quant_mat = intra_q;
 320
 321   dst[0] = src[0] << (3-dc_prec);
 322   for (i=1; i<64; i++)
 323   {
 324     val = (int)(src[i]*quant_mat[i]*mquant)/16;
 325
 326     /* mismatch control */
 327     if ((val&1)==0 && val!=0)
 328       val+= (val>0) ? -1 : 1;
 329
 330     /* saturation */
 331     dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 332   }
 333 }
 334
 335
 336 /* MPEG-2 inverse quantization */
 337 void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 338 {
 339   int i, val, sum;
 340
 341   if ( mpeg1  )
 342     iquant1_intra(src,dst,dc_prec, mquant);
 343   else
 344   {
 345     sum = dst[0] = src[0] << (3-dc_prec);
 346     for (i=1; i<64; i++)
 347     {
 348       val = (int)(src[i]*intra_q[i]*mquant)/16;
 349       sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 350     }
 351
 352     /* mismatch control */
 353     if ((sum&1)==0)
 354       dst[63]^= 1;
 355   }
 356 }
 357
 358
 359 static void iquant_non_intra_m1(int16_t *src, int16_t *dst,  uint16_t *quant_mat)
 360 {
 361   int i, val;
 362
 363 #ifndef ORIGINAL_CODE
 364
 365   for (i=0; i<64; i++)
 366   {
 367     val = src[i];
 368     if (val!=0)
 369     {
 370       val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;
 371
 372       /* mismatch control */
 373       if ((val&1)==0 && val!=0)
 374         val+= (val>0) ? -1 : 1;
 375     }
 376
 377     /* saturation */
 378      dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 379  }
 380 #else
 381
 382   for (i=0; i<64; i++)
 383   {
 384     val = abs(src[i]);
 385     if (val!=0)
 386     {
 387                 val = ((val+val+1)*quant_mat[i]) >> 5;
 388                 /* mismatch control */
 389                 val -= (~(val&1))&(val!=0);
 390                 val = fastmin(val, 2047); /* Saturation */
 391     }
 392         dst[i] = intsamesign(src[i],val);
 393
 394   }
 395
 396 #endif
 397 }
 398
 399
 400
 401
 402 void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
 403 {
 404   int i, val, sum;
 405   uint16_t *quant_mat;
 406
 407   if ( mpeg1 )
 408     (*piquant_non_intra_m1)(src,dst,inter_q_tbl[mquant]);
 409   else
 410   {
 411           sum = 0;
 412 #ifdef ORIGINAL_CODE
 413
 414           for (i=0; i<64; i++)
 415           {
 416                   val = src[i];
 417                   if (val!=0)
 418
 419                           val = (int)((2*val+(val>0 ? 1 : -1))*inter_q[i]*mquant)/32;
 420                   sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 421           }
 422 #else
 423           quant_mat = inter_q_tbl[mquant];
 424           for (i=0; i<64; i++)
 425           {
 426                   val = src[i];
 427                   if( val != 0 )
 428                   {
 429                           val = abs(val);
 430                           val = (int)((val+val+1)*quant_mat[i])>>5;
 431                           val = intmin( val, 2047);
 432                           sum += val;
 433                   }
 434                   dst[i] = intsamesign(src[i],val);
 435           }
 436 #endif
 437
 438     /* mismatch control */
 439     if ((sum&1)==0)
 440       dst[63]^= 1;
 441   }
 442 }
 443
 444 void iquantize( pict_data_s *picture )
 445 {
 446         int j,k;
 447         int16_t (*qblocks)[64] = picture->qblocks;
 448         for (k=0; k<mb_per_pict; k++)
 449         {
 450                 if (picture->mbinfo[k].mb_type & MB_INTRA)
 451                         for (j=0; j<block_count; j++)
 452                                 iquant_intra(qblocks[k*block_count+j],
 453                                                          qblocks[k*block_count+j],
 454                                                          cur_picture.dc_prec,
 455                                                          cur_picture.mbinfo[k].mquant);
 456                 else
 457                         for (j=0;j<block_count;j++)
 458                                 iquant_non_intra(qblocks[k*block_count+j],
 459                                                                  qblocks[k*block_count+j],
 460                                                                  cur_picture.mbinfo[k].mquant);
 461         }
 462 }