applied AkhIL's cinelerra-cv surround patch
[cinelerra_cv/ct.git] / mpeg2enc / ratectl.c
blob768384307e752f852a951b989a90b406cab72ff4
1 /* ratectl.c, bitrate control routines (linear quantization only currently) */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
5 /*
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
21 * patents.
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
26 * design.
30 #include <stdio.h>
31 #include <limits.h>
32 #include <math.h>
33 #include <pthread.h>
35 #include "config.h"
36 #include "global.h"
37 #include "fastintfns.h"
39 /* rate control variables */
41 * static double R, T, d;
42 * static double actsum;
43 * static int Np, Nb;
44 * static double S, Q;
45 * static int prev_mquant;
46 * static double bitcnt_EOP;
47 * static double next_ip_delay; // due to frame reordering delay
48 * static double decoding_time;
49 * static int Xi, Xp, Xb, r, d0i, d0p, d0b;
50 * static double avg_act;
53 void ratectl_init_seq(ratectl_t *ratectl)
55 pthread_mutexattr_t mutex_attr;
56 pthread_mutexattr_init(&mutex_attr);
57 pthread_mutex_init(&(ratectl->ratectl_lock), &mutex_attr);
59 ratectl->avg_KI = 2.5; /* TODO: These values empirically determined */
60 ratectl->avg_KB = 10.0; /* for MPEG-1, may need tuning for MPEG-2 */
61 ratectl->avg_KP = 10.0;
63 ratectl->bits_per_mb = (double)bit_rate / (mb_per_pict);
64 /* reaction parameter (constant) decreased to increase response
65 rate as encoder is currently tending to under/over-shoot... in
66 rate TODO: Reaction parameter is *same* for every frame type
67 despite different weightings... */
69 if (ratectl->r == 0)
70 ratectl->r = (int)floor(2.0 * bit_rate / frame_rate + 0.5);
72 ratectl->Ki = 1.2; /* EXPERIMENT: ADJUST activities for I MB's */
73 ratectl->Kb = 1.4;
74 ratectl->Kp = 1.1;
76 /* average activity */
77 if (ratectl->avg_act == 0.0) ratectl->avg_act = 400.0;
79 /* remaining # of bits in GOP */
80 ratectl->R = 0;
81 ratectl->IR = 0;
83 /* Heuristic: In constant bit-rate streams we assume buffering
84 will allow us to pre-load some (probably small) fraction of the
85 buffers size worth of following data if earlier data was
86 undershot its bit-rate allocation
90 ratectl->CarryR = 0;
91 ratectl->CarryRLim = video_buffer_size / 3;
92 /* global complexity (Chi! not X!) measure of different frame types */
93 /* These are just some sort-of sensible initial values for start-up */
95 ratectl->Xi = 1500*mb_per_pict; /* Empirically derived values */
96 ratectl->Xp = 550*mb_per_pict;
97 ratectl->Xb = 170*mb_per_pict;
98 ratectl->d0i = -1; /* Force initial Quant prediction */
99 ratectl->d0pb = -1;
101 ratectl->current_quant = 1;
104 void ratectl_init_GOP(ratectl_t *ratectl, int np, int nb)
106 double per_gop_bits =
107 (double)(1 + np + nb) * (double)bit_rate / frame_rate;
109 /* A.Stevens Aug 2000: at last I've found the wretched
110 rate-control overshoot bug... Simply "topping up" R here means
111 that we can accumulate an indefinately large pool of bits
112 "saved" from previous low-activity frames. This is of
113 course nonsense.
115 In CBR we can only accumulate as much as our buffer allows, after that
116 the eventual system stream will have to be padded. The automatic padding
117 will make this calculation fairly reasonable but since that's based on
118 estimates we again impose our rough and ready heuristic that we can't
119 accumulate more than approximately half a video buffer full.
121 In VBR we actually do nothing different. Here the bitrate is
122 simply a ceiling rate which we expect to undershoot a lot as
123 our quantisation floor cuts in. We specify a great big buffer
124 and simply don't pad when we undershoot.
126 However, we don't want to carry over absurd undershoots as when it
127 does get hectic we'll breach our maximum.
129 TODO: For CBR we should do a proper video buffer model and use
130 it to make bit allocation decisions.
134 if( ratectl->R > 0 )
136 /* We replacing running estimate of undershoot with
137 *exact* value and use that for calculating how much we
138 may "carry over"
140 ratectl->gop_undershoot = intmin( video_buffer_size/2, (int)ratectl->R );
142 ratectl->R = ratectl->gop_undershoot + per_gop_bits;
144 else
146 /* Overshoots are easy - we have to make up the bits */
147 ratectl->R += per_gop_bits;
148 ratectl->gop_undershoot = 0;
150 ratectl->IR = ratectl->R;
151 ratectl->Np = fieldpic ? 2 * np + 1 : np;
152 ratectl->Nb = fieldpic ? 2 * nb : nb;
155 static int scale_quant(pict_data_s *picture, double quant )
157 int iquant;
159 if (picture->q_scale_type )
161 iquant = (int) floor(quant+0.5);
163 /* clip mquant to legal (linear) range */
164 if (iquant<1)
165 iquant = 1;
166 if (iquant>112)
167 iquant = 112;
169 iquant =
170 non_linear_mquant_table_hv[map_non_linear_mquant_hv[iquant]];
172 else
174 /* clip mquant to legal (linear) range */
175 iquant = (int)floor(quant+0.5);
176 if (iquant<2)
177 iquant = 2;
178 if (iquant>62)
179 iquant = 62;
180 iquant = (iquant/2)*2; // Must be *even*
182 return iquant;
188 /* compute variance of 8x8 block */
189 static double var_sblk(p, lx)
190 unsigned char *p;
191 int lx;
193 int j;
194 register unsigned int v, s, s2;
196 s = s2 = 0;
198 for (j=0; j<8; j++)
200 v = p[0]; s += v; s2 += v * v;
201 v = p[1]; s += v; s2 += v * v;
202 v = p[2]; s += v; s2 += v * v;
203 v = p[3]; s += v; s2 += v * v;
204 v = p[4]; s += v; s2 += v * v;
205 v = p[5]; s += v; s2 += v * v;
206 v = p[6]; s += v; s2 += v * v;
207 v = p[7]; s += v; s2 += v * v;
208 p += lx;
211 return (double)s2 / 64.0 - ((double)s / 64.0) * ((double)s / 64.0);
215 static double calc_actj(pict_data_s *picture)
217 int i,j,k,l;
218 double actj,sum;
219 uint16_t *i_q_mat;
220 int actsum;
221 sum = 0.0;
222 k = 0;
224 for (j=0; j<height2; j+=16)
225 for (i=0; i<width; i+=16)
227 /* A.Stevens Jul 2000 Luminance variance *has* to be a rotten measure
228 of how active a block in terms of bits needed to code a lossless DCT.
229 E.g. a half-white half-black block has a maximal variance but
230 pretty small DCT coefficients.
232 So.... we use the absolute sum of DCT coefficients as our
233 variance measure.
235 if( picture->mbinfo[k].mb_type & MB_INTRA )
237 i_q_mat = i_intra_q;
238 /* EXPERIMENT: See what happens if we compensate for
239 the wholly disproprotionate weight of the DC
240 coefficients. Shold produce more sensible results... */
241 actsum = -80*COEFFSUM_SCALE;
243 else
245 i_q_mat = i_inter_q;
246 actsum = 0;
249 /* It takes some bits to code even an entirely zero block...
250 It also makes a lot of calculations a lot better conditioned
251 if it can be guaranteed that activity is always distinctly
252 non-zero.
256 for( l = 0; l < 6; ++l )
257 actsum +=
258 (*pquant_weight_coeff_sum)
259 ( cur_picture.mbinfo[k].dctblocks[l], i_q_mat ) ;
260 actj = (double)actsum / (double)COEFFSUM_SCALE;
261 if( actj < 12.0 )
262 actj = 12.0;
264 picture->mbinfo[k].act = (double)actj;
265 sum += (double)actj;
266 ++k;
268 return sum;
271 /* Note: we need to substitute K for the 1.4 and 1.0 constants -- this can
272 be modified to fit image content */
274 /* Step 1: compute target bits for current picture being coded */
275 void ratectl_init_pict(ratectl_t *ratectl, pict_data_s *picture)
277 double avg_K;
278 double target_Q;
279 double current_Q;
280 double Si, Sp, Sb;
281 /* TODO: A.Stevens Nov 2000 - This modification needs testing visually.
283 Weird. The original code used the average activity of the
284 *previous* frame as the basis for quantisation calculations for
285 rather than the activity in the *current* frame. That *has* to
286 be a bad idea..., surely, here we try to be smarter by using the
287 current values and keeping track of how much of the frames
288 activitity has been covered as we go along.
290 We also guesstimate the relationship between (sum
291 of DCT coefficients) and actual quantisation weighted activty.
292 We use this to try to predict the activity of each frame.
295 ratectl->actsum = calc_actj(picture );
296 ratectl->avg_act = (double)ratectl->actsum/(double)(mb_per_pict);
297 ratectl->sum_avg_act += ratectl->avg_act;
298 ratectl->actcovered = 0.0;
300 /* Allocate target bits for frame based on frames numbers in GOP
301 weighted by global complexity estimates and B-frame scale factor
302 T = (Nx * Xx/Kx) / Sigma_j (Nj * Xj / Kj)
304 ratectl->min_q = ratectl->min_d = INT_MAX;
305 ratectl->max_q = ratectl->max_d = INT_MIN;
306 switch (picture->pict_type)
308 case I_TYPE:
310 /* There is little reason to rely on the *last* I-frame
311 as they're not closely related. The slow correction of
312 K should be enough to fine-tune...
315 ratectl->d = ratectl->d0i;
316 avg_K = ratectl->avg_KI;
317 Si = (ratectl->Xi + 3.0*avg_K*ratectl->actsum)/4.0;
318 ratectl->T = ratectl->R/(1.0+ratectl->Np*ratectl->Xp*ratectl->Ki/(Si*ratectl->Kp)+ratectl->Nb*ratectl->Xb*ratectl->Ki/(Si*ratectl->Kb));
320 break;
321 case P_TYPE:
322 ratectl->d = ratectl->d0pb;
323 avg_K = ratectl->avg_KP;
324 Sp = (ratectl->Xp + avg_K*ratectl->actsum) / 2.0;
325 ratectl->T = ratectl->R/(ratectl->Np+ratectl->Nb*ratectl->Kp*ratectl->Xb/(ratectl->Kb*Sp)) + 0.5;
326 break;
327 case B_TYPE:
328 ratectl->d = ratectl->d0pb; // I and P frame share ratectl virtual buffer
329 avg_K = ratectl->avg_KB;
330 Sb = ratectl->Xb /* + avg_K * ratectl->actsum) / 2.0 */;
331 ratectl->T = ratectl->R/(ratectl->Nb+ratectl->Np*ratectl->Kb*ratectl->Xp/(ratectl->Kp*Sb));
332 break;
335 /* Undershot bits have been "returned" via R */
336 if( ratectl->d < 0 )
337 ratectl->d = 0;
339 /* We don't let the target volume get absurdly low as it makes some
340 of the prediction maths ill-condtioned. At these levels quantisation
341 is always minimum anyway
343 if( ratectl->T < 4000.0 )
345 ratectl->T = 4000.0;
347 target_Q = scale_quant(picture,
348 avg_K * ratectl->avg_act *(mb_per_pict) / ratectl->T);
349 current_Q = scale_quant(picture,62.0*ratectl->d / ratectl->r);
350 #ifdef DEBUG
351 if( !quiet )
353 /* printf( "AA=%3.4f T=%6.0f K=%.1f ",avg_act, (double)T, avg_K ); */
354 printf( "AA=%3.4f SA==%3.4f ",avg_act, sum_avg_act );
356 #endif
358 if ( current_Q < 3 && target_Q > 12 )
360 /* We're undershooting and a serious surge in the data_flow
361 due to lagging adjustment is possible...
363 ratectl->d = (int) (target_Q * ratectl->r / 62.0);
366 ratectl->S = bitcount();
367 ratectl->frame_start = bitcount();
368 // ratectl->current_quant = ratectl->d * 62.0 / ratectl->r;
369 if(ratectl->current_quant < 1) ratectl->current_quant = 1;
370 if(ratectl->current_quant > 100) ratectl->current_quant = 100;
373 /* compute initial quantization stepsize (at the beginning of picture) */
374 int ratectl_start_mb(ratectl_t *ratectl, pict_data_s *picture)
376 double Qj;
377 int mquant;
379 if(fixed_mquant)
380 Qj = fixed_mquant;
381 else
382 Qj = ratectl->current_quant;
383 // Qj = ratectl->d * 62.0 / ratectl->r;
385 mquant = scale_quant( picture, Qj);
386 mquant = intmax(mquant, quant_floor);
388 return mquant;
391 void ratectl_update_pict(ratectl_t *ratectl, pict_data_s *picture)
393 double X;
394 double K;
395 int64_t AP,PP; /* Actual and padded picture bit counts */
396 int i;
397 int Qsum;
398 int frame_overshoot;
399 double avg_bitrate;
400 int last_size;
401 double new_weight;
402 double old_weight;
404 if(fixed_mquant) return;
406 AP = bitcount() - ratectl->S;
407 frame_overshoot = (int)AP-(int)ratectl->T;
409 /* For the virtual buffers for quantisation feedback it is the
410 actual under/overshoot that counts, not what's left after padding
412 ratectl->d += frame_overshoot;
414 /* If the cummulative undershoot is getting too large (as
415 a rough and ready heuristic we use 1/2 video buffer size)
416 we start padding the stream. Or, in the case of VBR,
417 we pretend we're padding but don't actually write anything!
421 if( ratectl->gop_undershoot-frame_overshoot > video_buffer_size/2 )
423 int padding_bytes =
424 ((ratectl->gop_undershoot - frame_overshoot) - video_buffer_size/2)/8;
425 if( quant_floor != 0 ) /* VBR case pretend to pad */
427 PP = AP + padding_bytes;
429 else
431 // printf( "PAD" );
432 // alignbits();
433 for( i = 0; i < padding_bytes/2; ++i )
435 // putbits(0, 16);
437 PP = bitcount() - ratectl->S; /* total # of bits in picture */
439 frame_overshoot = (int)PP - (int)ratectl->T;
441 else
442 PP = AP;
444 /* Estimate cummulative undershoot within this gop.
445 This is only an estimate because T includes an allocation
446 from earlier undershoots causing multiple counting.
447 Padding and an exact calculation each gop prevent the error
448 in the estimate growing too excessive...
450 ratectl->gop_undershoot -= frame_overshoot;
451 ratectl->gop_undershoot = ratectl->gop_undershoot > 0 ? ratectl->gop_undershoot : 0;
452 ratectl->R -= PP; /* remaining # of bits in GOP */
454 Qsum = 0;
455 for( i = 0; i < mb_per_pict; ++i )
457 Qsum += picture->mbinfo[i].mquant;
461 ratectl->AQ = (double)Qsum/(double)mb_per_pict;
462 /* TODO: The X are used as relative activity measures... so why
463 bother dividing by 2?
464 Note we have to be careful to measure the actual data not the
465 padding too!
467 ratectl->SQ += ratectl->AQ;
468 X = (double)AP*(ratectl->AQ/2.0);
470 K = X / ratectl->actsum;
471 #ifdef DEBUG
472 if( !quiet )
474 printf( "AQ=%.1f SQ=%.2f", AQ,SQ);
476 #endif
477 /* Bits that never got used in the past can't be resurrected
478 now... We use an average of past (positive) virtual buffer
479 fullness in the event of an under-shoot as otherwise we will
480 tend to over-shoot heavily when activity picks up.
482 TODO: We should really use our estimate K[IBP] of
483 bit_usage*activity / quantisation ratio to set a "sensible"
484 initial d to achieve a reasonable initial quantisation. Rather
485 than have to cut in a huge (lagging correction).
487 Alternatively, simply requantising with mean buffer if there is
488 a big buffer swing would work nicely...
492 /* EXPERIMENT: Xi are used as a guesstimate of likely *future* frame
493 activities based on the past. Thus we don't want anomalous outliers
494 due to scene changes swinging things too much. Introduce moving averages
495 for the Xi...
496 TODO: The averaging constants should be adjust to suit relative frame
497 frequencies...
499 switch (picture->pict_type)
501 case I_TYPE:
502 ratectl->avg_KI = (K + ratectl->avg_KI * K_AVG_WINDOW_I) / (K_AVG_WINDOW_I+1.0) ;
503 ratectl->d0i = ratectl->d;
504 ratectl->Xi = (X + 3.0 * ratectl->Xi) / 4.0;
505 break;
506 case P_TYPE:
507 ratectl->avg_KP = (K + ratectl->avg_KP * K_AVG_WINDOW_P) / (K_AVG_WINDOW_P+1.0) ;
508 ratectl->d0pb = ratectl->d;
509 ratectl->Xp = (X + ratectl->Xp * 12.0) / 13.0;
510 ratectl->Np--;
511 break;
512 case B_TYPE:
513 ratectl->avg_KB = (K + ratectl->avg_KB * K_AVG_WINDOW_B) / (K_AVG_WINDOW_B + 1.0) ;
514 ratectl->d0pb = ratectl->d;
515 ratectl->Xb = (X + ratectl->Xb * 24.0) / 25.0;
516 ratectl->Nb--;
517 break;
520 ratectl->frame_end = bitcount();
522 last_size = ratectl->frame_end - ratectl->frame_start;
523 avg_bitrate = (double)last_size * frame_rate;
524 switch(picture->pict_type)
526 case I_TYPE:
527 new_weight = avg_bitrate / bit_rate * 1 / N;
528 old_weight = (double)(N - 1) / N;
529 break;
531 default:
532 case P_TYPE:
533 new_weight = avg_bitrate / bit_rate * (N - 1) / N;
534 old_weight = (double)1 / N;
535 break;
537 ratectl->current_quant *= (old_weight + new_weight);
540 * printf("ratectl_update_pict %f %f\n",
541 * avg_bitrate,
542 * ratectl->current_quant);
547 /* Step 2: measure virtual buffer - estimated buffer discrepancy */
548 int ratectl_calc_mquant(ratectl_t *ratectl, pict_data_s *picture, int j)
550 int mquant;
551 double dj, Qj, actj, N_actj;
553 // pthread_mutex_lock(&(ratectl->ratectl_lock));
554 /* A.Stevens 2000 : we measure how much *information* (total activity)
555 has been covered and aim to release bits in proportion. Indeed,
556 complex blocks get an disproprortionate boost of allocated bits.
557 To avoid visible "ringing" effects...
561 actj = picture->mbinfo[j].act;
562 /* Guesstimate a virtual buffer fullness based on
563 bits used vs. bits in proportion to activity encoded
567 dj = ((double)ratectl->d) +
568 ((double)(bitcount() - ratectl->S) - ratectl->actcovered * ((double)ratectl->T) / ratectl->actsum);
572 /* scale against dynamic range of mquant and the bits/picture count.
573 quant_floor != 0.0 is the VBR case where we set a bitrate as a (high)
574 maximum and then put a floor on quantisation to achieve a reasonable
575 overall size.
576 Not that this *is* baseline quantisation. Not adjust for local activity.
577 Otherwise we end up blurring active macroblocks. Silly in a VBR context.
580 Qj = dj * 62.0 / ratectl->r;
582 //printf("ratectl_calc_mquant %f\n", Qj);
583 if(fixed_mquant)
584 Qj = fixed_mquant;
585 else
586 Qj = ratectl->current_quant;
588 Qj = (Qj > quant_floor) ? Qj : quant_floor;
589 /* Heuristic: Decrease quantisation for blocks with lots of
590 sizeable coefficients. We assume we just get a mess if
591 a complex texture's coefficients get chopped...
594 N_actj = actj < ratectl->avg_act ?
595 1.0 :
596 (actj + act_boost * ratectl->avg_act) /
597 (act_boost * actj + ratectl->avg_act);
599 mquant = scale_quant(picture, Qj * N_actj);
602 /* Update activity covered */
604 ratectl->actcovered += actj;
605 // pthread_mutex_unlock(&(ratectl->ratectl_lock));
607 return mquant;
610 /* VBV calculations
612 * generates warnings if underflow or overflow occurs
615 /* vbv_end_of_picture
617 * - has to be called directly after writing picture_data()
618 * - needed for accurate VBV buffer overflow calculation
619 * - assumes there is no byte stuffing prior to the next start code
622 void vbv_end_of_picture()
626 /* calc_vbv_delay
628 * has to be called directly after writing the picture start code, the
629 * reference point for vbv_delay
632 void calc_vbv_delay()
636 void stop_ratectl(ratectl_t *ratectl)
638 pthread_mutex_destroy(&(ratectl->ratectl_lock));