r419: various fixes, memory allocations, etc.
[cinelerra_cv/mob.git] / mpeg2enc / quantize.c
blob9c79c16715905700efa3f3b36460e5b0a9891624
1 /* quantize.c, quantization / inverse quantization */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
5 /*
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
21 * patents.
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
26 * design.
30 #include "config.h"
31 #include <stdio.h>
32 #include <math.h>
33 #include <fenv.h>
34 #include "global.h"
35 #include "cpu_accel.h"
36 #include "simd.h"
37 #include "fastintfns.h"
40 /* Global function pointers for SIMD-dependent functions */
41 int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
42 int mquant, int *nonsat_mquant);
43 int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );
45 /* Local functions pointers for SIMD-dependent functions */
47 static void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst, uint16_t *quant_mat);
50 static int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
51 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);
55 Initialise quantization routines.
56 Currently just setting up MMX routines if available...
59 void init_quantizer()
61 int flags;
62 flags = cpu_accel();
63 #ifdef X86_CPU
64 if( (flags & ACCEL_X86_MMX) != 0 ) /* MMX CPU */
66 if(verbose) fprintf( stderr, "SETTING " );
67 if( (flags & ACCEL_X86_3DNOW) != 0 )
69 if(verbose) fprintf( stderr, "3DNOW and ");
70 pquant_non_intra = quant_non_intra_3dnow;
72 else if ( (flags & ACCEL_X86_MMXEXT) != 0 )
74 if(verbose) fprintf( stderr, "SSE and ");
75 pquant_non_intra = quant_non_intra_sse;
77 else
79 pquant_non_intra = quant_non_intra;
82 if ( (flags & ACCEL_X86_MMXEXT) != 0 )
84 if(verbose) fprintf( stderr, "EXTENDED MMX");
85 pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
86 piquant_non_intra_m1 = iquant_non_intra_m1_sse;
88 else
90 if(verbose) fprintf( stderr, "MMX");
91 pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
92 piquant_non_intra_m1 = iquant_non_intra_m1_mmx;
94 if(verbose) fprintf( stderr, " for QUANTIZER!\n");
96 else
97 #endif
99 pquant_non_intra = quant_non_intra;
100 pquant_weight_coeff_sum = quant_weight_coeff_sum;
101 piquant_non_intra_m1 = iquant_non_intra_m1;
107 * Computes the next quantisation up. Used to avoid saturation
108 * in macroblock coefficients - common in MPEG-1 - which causes
109 * nasty artefacts.
111 * NOTE: Does no range checking...
116 int next_larger_quant( pict_data_s *picture, int quant )
118 if( picture->q_scale_type )
120 if( map_non_linear_mquant[quant]+1 > 31 )
121 return quant;
122 else
123 return non_linear_mquant_table[map_non_linear_mquant[quant]+1];
125 else
127 if( quant+2 > 31 )
128 return quant;
129 else
130 return quant+2;
136 * Quantisation for intra blocks using Test Model 5 quantization
138 * this quantizer has a bias of 1/8 stepsize towards zero
139 * (except for the DC coefficient)
141 PRECONDITION: src dst point to *disinct* memory buffers...
142 of block_count *adjact* int16_t[64] arrays...
144 * RETURN: 1 If non-zero coefficients left after quantisaiont 0 otherwise
147 void quant_intra(
148 pict_data_s *picture,
149 int16_t *src,
150 int16_t *dst,
151 int mquant,
152 int *nonsat_mquant
155 int16_t *psrc,*pbuf;
156 int i,comp;
157 int x, y, d;
158 int clipping;
159 int clipvalue = dctsatlim;
160 uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;
163 /* Inspired by suggestion by Juan. Quantize a little harder if we clip...
168 clipping = 0;
169 pbuf = dst;
170 psrc = src;
171 for( comp = 0; comp<block_count && !clipping; ++comp )
173 x = psrc[0];
174 d = 8>>picture->dc_prec; /* intra_dc_mult */
175 pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */
178 for (i=1; i<64 ; i++)
180 x = psrc[i];
181 d = quant_mat[i];
182 /* RJ: save one divide operation */
183 y = ((abs(x) << 5)+ ((3 * quant_mat[i]) >> 2)) / (quant_mat[i] << 1);
184 if ( y > clipvalue )
186 clipping = 1;
187 mquant = next_larger_quant( picture, mquant );
188 quant_mat = intra_q_tbl[mquant];
189 break;
192 pbuf[i] = intsamesign(x,y);
194 pbuf += 64;
195 psrc += 64;
198 } while( clipping );
199 *nonsat_mquant = mquant;
204 * Quantisation matrix weighted Coefficient sum fixed-point
205 * integer with low 16 bits fractional...
206 * To be used for rate control as a measure of dct block
207 * complexity...
211 int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
213 int i;
214 int sum = 0;
215 for( i = 0; i < 64; i+=2 )
217 sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
219 return sum;
220 /* In case you're wondering typical average coeff_sum's for a rather
221 noisy video are around 20.0. */
227 * Quantisation for non-intra blocks using Test Model 5 quantization
229 * this quantizer has a bias of 1/8 stepsize towards zero
230 * (except for the DC coefficient)
232 * A.Stevens 2000: The above comment is nonsense. Only the intra quantiser does
233 * this. This one just truncates with a modest bias of 1/(4*quant_matrix_scale)
234 * to 1.
236 * PRECONDITION: src dst point to *disinct* memory buffers...
237 * of block_count *adjacent* int16_t[64] arrays...
239 * RETURN: A bit-mask of block_count bits indicating non-zero blocks (a 1).
241 * TODO: A candidate for use of efficient abs and "intsamesign". If only gcc understood
242 * PPro conditional moves...
245 int quant_non_intra(
246 pict_data_s *picture,
247 int16_t *src, int16_t *dst,
248 int mquant,
249 int *nonsat_mquant)
251 int i;
252 int x, y, d;
253 int nzflag;
254 int coeff_count;
255 int clipvalue = dctsatlim;
256 int flags = 0;
257 int saturated = 0;
258 uint16_t *quant_mat = inter_q_tbl[mquant]/* inter_q */;
260 coeff_count = 64*block_count;
261 flags = 0;
262 nzflag = 0;
263 for (i=0; i<coeff_count; ++i)
265 restart:
266 if( (i%64) == 0 )
268 nzflag = (nzflag<<1) | !!flags;
269 flags = 0;
272 /* RJ: save one divide operation */
274 x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
275 d = (int)quant_mat[(i&63)];
276 /* A.Stevens 2000: Given the math of non-intra frame
277 quantisation / inverse quantisation I always though the
278 funny little foudning factor was bogus. It seems to be
279 the encoder needs less bits if you simply divide!
282 y = (x<<4) / (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
283 if ( y > clipvalue )
285 if( saturated )
287 y = clipvalue;
289 else
291 int new_mquant = next_larger_quant( picture, mquant );
292 if( new_mquant != mquant )
294 mquant = new_mquant;
295 quant_mat = inter_q_tbl[mquant];
297 else
299 saturated = 1;
301 i=0;
302 nzflag =0;
303 goto restart;
306 dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
307 flags |= dst[i];
309 nzflag = (nzflag<<1) | !!flags;
311 *nonsat_mquant = mquant;
312 return nzflag;
315 /* MPEG-1 inverse quantization */
316 static void iquant1_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
318 int i, val;
319 uint16_t *quant_mat = intra_q;
321 dst[0] = src[0] << (3-dc_prec);
322 for (i=1; i<64; i++)
324 val = (int)(src[i]*quant_mat[i]*mquant)/16;
326 /* mismatch control */
327 if ((val&1)==0 && val!=0)
328 val+= (val>0) ? -1 : 1;
330 /* saturation */
331 dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
336 /* MPEG-2 inverse quantization */
337 void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
339 int i, val, sum;
341 if ( mpeg1 )
342 iquant1_intra(src,dst,dc_prec, mquant);
343 else
345 sum = dst[0] = src[0] << (3-dc_prec);
346 for (i=1; i<64; i++)
348 val = (int)(src[i]*intra_q[i]*mquant)/16;
349 sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
352 /* mismatch control */
353 if ((sum&1)==0)
354 dst[63]^= 1;
359 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat)
361 int i, val;
363 #ifndef ORIGINAL_CODE
365 for (i=0; i<64; i++)
367 val = src[i];
368 if (val!=0)
370 val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;
372 /* mismatch control */
373 if ((val&1)==0 && val!=0)
374 val+= (val>0) ? -1 : 1;
377 /* saturation */
378 dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
380 #else
382 for (i=0; i<64; i++)
384 val = abs(src[i]);
385 if (val!=0)
387 val = ((val+val+1)*quant_mat[i]) >> 5;
388 /* mismatch control */
389 val -= (~(val&1))&(val!=0);
390 val = fastmin(val, 2047); /* Saturation */
392 dst[i] = intsamesign(src[i],val);
396 #endif
402 void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
404 int i, val, sum;
405 uint16_t *quant_mat;
407 if ( mpeg1 )
408 (*piquant_non_intra_m1)(src,dst,inter_q_tbl[mquant]);
409 else
411 sum = 0;
412 #ifdef ORIGINAL_CODE
414 for (i=0; i<64; i++)
416 val = src[i];
417 if (val!=0)
419 val = (int)((2*val+(val>0 ? 1 : -1))*inter_q[i]*mquant)/32;
420 sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
422 #else
423 quant_mat = inter_q_tbl[mquant];
424 for (i=0; i<64; i++)
426 val = src[i];
427 if( val != 0 )
429 val = abs(val);
430 val = (int)((val+val+1)*quant_mat[i])>>5;
431 val = intmin( val, 2047);
432 sum += val;
434 dst[i] = intsamesign(src[i],val);
436 #endif
438 /* mismatch control */
439 if ((sum&1)==0)
440 dst[63]^= 1;
444 void iquantize( pict_data_s *picture )
446 int j,k;
447 int16_t (*qblocks)[64] = picture->qblocks;
448 for (k=0; k<mb_per_pict; k++)
450 if (picture->mbinfo[k].mb_type & MB_INTRA)
451 for (j=0; j<block_count; j++)
452 iquant_intra(qblocks[k*block_count+j],
453 qblocks[k*block_count+j],
454 cur_picture.dc_prec,
455 cur_picture.mbinfo[k].mquant);
456 else
457 for (j=0;j<block_count;j++)
458 iquant_non_intra(qblocks[k*block_count+j],
459 qblocks[k*block_count+j],
460 cur_picture.mbinfo[k].mquant);