changeset 612:c0005de2be59 libavcodec

new ratecontrol code
author michaelni
date Sun, 25 Aug 2002 21:19:50 +0000
parents 3214d3f4519e
children 9bb356b23dd9
files Makefile dsputil.c dsputil.h eval.c motion_est.c mpegvideo.c mpegvideo.h ratecontrol.c
diffstat 8 files changed, 921 insertions(+), 284 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat Aug 24 13:24:43 2002 +0000
+++ b/Makefile	Sun Aug 25 21:19:50 2002 +0000
@@ -15,7 +15,7 @@
       mpegaudio.o ac3enc.o mjpeg.o resample.o dsputil.o \
       motion_est.o imgconvert.o imgresample.o msmpeg4.o \
       mpeg12.o h263dec.o svq1.o rv10.o mpegaudiodec.o pcm.o simple_idct.o \
-      ratecontrol.o adpcm.o
+      ratecontrol.o adpcm.o eval.o
 ASM_OBJS=
 
 # currently using liba52 for ac3 decoding
--- a/dsputil.c	Sat Aug 24 13:24:43 2002 +0000
+++ b/dsputil.c	Sun Aug 25 21:19:50 2002 +0000
@@ -32,6 +32,8 @@
 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 void (*clear_blocks)(DCTELEM *blocks);
+int (*pix_sum)(UINT8 * pix, int line_size);
+int (*pix_norm1)(UINT8 * pix, int line_size);
 
 op_pixels_abs_func pix_abs16x16;
 op_pixels_abs_func pix_abs16x16_x2;
@@ -159,6 +161,52 @@
     }
 }
 
+int pix_sum_c(UINT8 * pix, int line_size)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+	for (j = 0; j < 16; j += 8) {
+	    s += pix[0];
+	    s += pix[1];
+	    s += pix[2];
+	    s += pix[3];
+	    s += pix[4];
+	    s += pix[5];
+	    s += pix[6];
+	    s += pix[7];
+	    pix += 8;
+	}
+	pix += line_size - 16;
+    }
+    return s;
+}
+
+int pix_norm1_c(UINT8 * pix, int line_size)
+{
+    int s, i, j;
+    UINT32 *sq = squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+	for (j = 0; j < 16; j += 8) {
+	    s += sq[pix[0]];
+	    s += sq[pix[1]];
+	    s += sq[pix[2]];
+	    s += sq[pix[3]];
+	    s += sq[pix[4]];
+	    s += sq[pix[5]];
+	    s += sq[pix[6]];
+	    s += sq[pix[7]];
+	    pix += 8;
+	}
+	pix += line_size - 16;
+    }
+    return s;
+}
+
+
 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
 {
     int i;
@@ -241,7 +289,6 @@
         block += 8;
     }
 }
-
 #if 0
 
 #define PIXOP2(OPNAME, OP) \
@@ -569,7 +616,6 @@
 };
 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
 #endif
-
 #define op_put(a, b) a = b
 
 PIXOP2(avg, op_avg)
@@ -684,8 +730,11 @@
 
 #define op_avg(a, b) a = avg2(a, b)
 #define op_sub(a, b) a -= b
+#define op_put(a, b) a = b
 
 PIXOP(DCTELEM, sub, op_sub, 8)
+PIXOP(uint8_t, avg, op_avg, line_size)
+PIXOP(uint8_t, put, op_put, line_size)
 
 /* not rounding primitives */
 #undef avg2
@@ -693,6 +742,8 @@
 #define avg2(a,b) ((a+b)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
 
+PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
+PIXOP(uint8_t, put_no_rnd, op_put, line_size)
 /* motion estimation */
 
 #undef avg2
@@ -1261,6 +1312,8 @@
     add_pixels_clamped = add_pixels_clamped_c;
     gmc1= gmc1_c;
     clear_blocks= clear_blocks_c;
+    pix_sum= pix_sum_c;
+    pix_norm1= pix_norm1_c;
 
     pix_abs16x16     = pix_abs16x16_c;
     pix_abs16x16_x2  = pix_abs16x16_x2_c;
--- a/dsputil.h	Sat Aug 24 13:24:43 2002 +0000
+++ b/dsputil.h	Sun Aug 25 21:19:50 2002 +0000
@@ -62,6 +62,9 @@
 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 extern void (*clear_blocks)(DCTELEM *blocks);
+extern int (*pix_sum)(UINT8 * pix, int line_size);
+extern int (*pix_norm1)(UINT8 * pix, int line_size);
+
 
 
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eval.c	Sun Aug 25 21:19:50 2002 +0000
@@ -0,0 +1,241 @@
+/*
+ * simple arithmetic expression evaluator
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+ /*
+ * see http://joe.hotchkiss.com/programming/eval/eval.html
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#define STACK_SIZE 100
+
+typedef struct Parser{
+    double stack[STACK_SIZE];
+    int stack_index;
+    char *s;
+    double *const_value;
+    char **const_name;          // NULL terminated
+    double (**func1)(void *, double a); // NULL terminated
+    char **func1_name;          // NULL terminated
+    double (**func2)(void *, double a, double b); // NULL terminated
+    char **func2_name;          // NULL terminated
+    void *opaque;
+} Parser;
+
+static void evalExpression(Parser *p);
+
+static void push(Parser *p, double d){
+    if(p->stack_index+1>= STACK_SIZE){
+        fprintf(stderr, "stack overflow in the parser\n");
+        return;
+    }
+    p->stack[ p->stack_index++ ]= d;
+//printf("push %f\n", d); fflush(stdout);
+}
+
+static double pop(Parser *p){
+    if(p->stack_index<=0){
+        fprintf(stderr, "stack underflow in the parser\n");
+        return NAN;
+    }
+//printf("pop\n"); fflush(stdout);
+    return p->stack[ --p->stack_index ];
+}
+
+static int strmatch(char *s, char *prefix){
+    int i;
+    for(i=0; prefix[i]; i++){
+        if(prefix[i] != s[i]) return 0;
+    }
+    return 1;
+}
+
+static void evalPrimary(Parser *p){
+    double d, d2=NAN;
+    char *next= p->s;
+    int i;
+
+    /* number */
+    d= strtod(p->s, &next);
+    if(next != p->s){
+        push(p, d);
+        p->s= next;
+        return;
+    }
+    
+    /* named constants */
+    for(i=0; p->const_name[i]; i++){
+        if(strmatch(p->s, p->const_name[i])){
+            push(p, p->const_value[i]);
+            p->s+= strlen(p->const_name[i]);
+            return;
+        }
+    }
+    
+    p->s= strchr(p->s, '(');
+    if(p->s==NULL){
+        fprintf(stderr, "Parser: missing ( in \"%s\"\n", next);
+        return;
+    }
+    p->s++; // "("
+    evalExpression(p);
+    d= pop(p);
+    p->s++; // ")" or ","
+    if(p->s[-1]== ','){
+        evalExpression(p);
+        d2= pop(p);
+        p->s++; // ")"
+    }
+    
+         if( strmatch(next, "sinh"  ) ) d= sinh(d);
+    else if( strmatch(next, "cosh"  ) ) d= cosh(d);
+    else if( strmatch(next, "tanh"  ) ) d= tanh(d);
+    else if( strmatch(next, "sin"   ) ) d= sin(d);
+    else if( strmatch(next, "cos"   ) ) d= cos(d);
+    else if( strmatch(next, "tan"   ) ) d= tan(d);
+    else if( strmatch(next, "exp"   ) ) d= exp(d);
+    else if( strmatch(next, "log"   ) ) d= log(d);
+    else if( strmatch(next, "squish") ) d= 1/(1+exp(4*d));
+    else if( strmatch(next, "gauss" ) ) d= exp(-d*d/2)/sqrt(2*M_PI);
+    else if( strmatch(next, "abs"   ) ) d= abs(d);
+    else if( strmatch(next, "max"   ) ) d= d > d2 ? d : d2;
+    else if( strmatch(next, "min"   ) ) d= d < d2 ? d : d2;
+    else if( strmatch(next, "gt"    ) ) d= d > d2 ? 1.0 : 0.0;
+    else if( strmatch(next, "lt"    ) ) d= d > d2 ? 0.0 : 1.0;
+    else if( strmatch(next, "eq"    ) ) d= d == d2 ? 1.0 : 0.0;
+//    else if( strmatch(next, "l1"    ) ) d= 1 + d2*(d - 1);
+//    else if( strmatch(next, "sq01"  ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0;
+    else{
+        int error=1;
+        for(i=0; p->func1_name && p->func1_name[i]; i++){
+            if(strmatch(next, p->func1_name[i])){
+                d= p->func1[i](p->opaque, d);
+                error=0;
+                break;
+            }
+        }
+
+        for(i=0; p->func2_name && p->func2_name[i]; i++){
+            if(strmatch(next, p->func2_name[i])){
+                d= p->func2[i](p->opaque, d, d2);
+                error=0;
+                break;
+            }
+        }
+
+        if(error){
+            fprintf(stderr, "Parser: unknown function in \"%s\"\n", next);
+            return;
+        }
+    }
+    
+    if(p->s[-1]!= ')'){
+        fprintf(stderr, "Parser: missing ) in \"%s\"\n", next);
+        return;
+    }
+    push(p, d);
+}      
+       
+static void evalPow(Parser *p){
+    int neg= 0;
+    if(p->s[0]=='+') p->s++;
+       
+    if(p->s[0]=='-'){ 
+        neg= 1;
+        p->s++;
+    }
+    
+    if(p->s[0]=='('){
+        p->s++;;
+        evalExpression(p);
+
+        if(p->s[0]!=')')
+            fprintf(stderr, "Parser: missing )\n");
+        p->s++;
+    }else{
+        evalPrimary(p);
+    }
+    
+    if(neg) push(p, -pop(p));
+}
+
+static void evalFactor(Parser *p){
+    evalPow(p);
+    while(p->s[0]=='^'){
+        double d;
+
+        p->s++;
+        evalPow(p);
+        d= pop(p);
+        push(p, pow(pop(p), d));
+    }
+}
+
+static void evalTerm(Parser *p){
+    evalFactor(p);
+    while(p->s[0]=='*' || p->s[0]=='/'){
+        int inv= p->s[0]=='/';
+        double d;
+
+        p->s++;
+        evalFactor(p);
+        d= pop(p);
+        if(inv) d= 1.0/d;
+        push(p, d * pop(p));
+    }
+}
+
+static void evalExpression(Parser *p){
+    evalTerm(p);
+    while(p->s[0]=='+' || p->s[0]=='-'){
+        int sign= p->s[0]=='-';
+        double d;
+
+        p->s++;
+        evalTerm(p);
+        d= pop(p);
+        if(sign) d= -d;
+        push(p, d + pop(p));
+    }
+}
+
+double ff_eval(char *s, double *const_value, char **const_name,
+               double (**func1)(void *, double), char **func1_name, 
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque){
+    Parser p;
+    
+    p.stack_index=0;
+    p.s= s;
+    p.const_value= const_value;
+    p.const_name = const_name;
+    p.func1      = func1;
+    p.func1_name = func1_name;
+    p.func2      = func2;
+    p.func2_name = func2_name;
+    p.opaque     = opaque;
+    
+    evalExpression(&p);
+    return pop(&p);
+}
--- a/motion_est.c	Sat Aug 24 13:24:43 2002 +0000
+++ b/motion_est.c	Sun Aug 25 21:19:50 2002 +0000
@@ -41,28 +41,6 @@
 #define P_MV1 P[9]
 
 
-static int pix_sum(UINT8 * pix, int line_size)
-{
-    int s, i, j;
-
-    s = 0;
-    for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += pix[0];
-	    s += pix[1];
-	    s += pix[2];
-	    s += pix[3];
-	    s += pix[4];
-	    s += pix[5];
-	    s += pix[6];
-	    s += pix[7];
-	    pix += 8;
-	}
-	pix += line_size - 16;
-    }
-    return s;
-}
-
 static int pix_dev(UINT8 * pix, int line_size, int mean)
 {
     int s, i, j;
@@ -85,29 +63,6 @@
     return s;
 }
 
-static int pix_norm1(UINT8 * pix, int line_size)
-{
-    int s, i, j;
-    UINT32 *sq = squareTbl + 256;
-
-    s = 0;
-    for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += sq[pix[0]];
-	    s += sq[pix[1]];
-	    s += sq[pix[2]];
-	    s += sq[pix[3]];
-	    s += sq[pix[4]];
-	    s += sq[pix[5]];
-	    s += sq[pix[6]];
-	    s += sq[pix[7]];
-	    pix += 8;
-	}
-	pix += line_size - 16;
-    }
-    return s;
-}
-
 static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
 {
     int s, i, j;
@@ -1578,9 +1533,7 @@
 
     fbmin= bidir_refine(s, mb_x, mb_y);
 
-    if(s->flags&CODEC_FLAG_HQ){
-        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT;
-    }else{
+    {
         int score= dmin;
         type=MB_TYPE_DIRECT;
         
@@ -1596,9 +1549,15 @@
             score=fbmin;
             type= MB_TYPE_BIDIR;
         }
+        score= (score*score)>>8;
         s->mc_mb_var_sum += score;
-        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score;
+        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
     }
+
+    if(s->flags&CODEC_FLAG_HQ){
+        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
+    }
+
 /*
 {
 static int count=0;
--- a/mpegvideo.c	Sat Aug 24 13:24:43 2002 +0000
+++ b/mpegvideo.c	Sun Aug 25 21:19:50 2002 +0000
@@ -226,6 +226,8 @@
             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
         }
+        
+        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
     }
     
     if (s->out_format == FMT_H263 || s->encoding) {
@@ -328,7 +330,8 @@
     av_freep(&s->pb2_buffer);
     av_freep(&s->edge_emu_buffer);
     av_freep(&s->non_b_mv4_table);
-
+    av_freep(&s->avctx->stats_out);
+    
     for(i=0;i<3;i++) {
         int j;
         if(!(s->flags&CODEC_FLAG_DR1)){
@@ -377,13 +380,10 @@
     s->max_qdiff= avctx->max_qdiff;
     s->qcompress= avctx->qcompress;
     s->qblur= avctx->qblur;
-    s->b_quant_factor= avctx->b_quant_factor;
-    s->b_quant_offset= avctx->b_quant_offset;
     s->avctx = avctx;
     s->aspect_ratio_info= avctx->aspect_ratio_info;
     s->flags= avctx->flags;
     s->max_b_frames= avctx->max_b_frames;
-    s->rc_strategy= avctx->rc_strategy;
     s->b_frame_strategy= avctx->b_frame_strategy;
     s->codec_id= avctx->codec->id;
     s->luma_elim_threshold  = avctx->luma_elim_threshold;
@@ -678,7 +678,6 @@
                 avctx->dr_opaque_frame= s->next_dr_opaque;
         }
     }
-
     /* set dequantizer, we cant do it during init as it might change for mpeg4
        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
     if(s->out_format == FMT_H263){
@@ -703,10 +702,9 @@
     }
     emms_c();
     
+    s->last_pict_type    = s->pict_type;
     if(s->pict_type!=B_TYPE){
         s->last_non_b_pict_type= s->pict_type;
-        s->last_non_b_qscale= s->qscale;
-        s->last_non_b_mc_mb_var= s->mc_mb_var_sum;
         s->num_available_buffers++;
         if(s->num_available_buffers>2) s->num_available_buffers= 2;
     }
@@ -873,13 +871,21 @@
 
     flush_put_bits(&s->pb);
     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
-    if(s->pict_type==B_TYPE) s->pb_frame_bits+= s->frame_bits;
-    else                     s->pb_frame_bits= s->frame_bits;
-
+    
     s->total_bits += s->frame_bits;
     avctx->frame_bits  = s->frame_bits;
 //printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", 
 //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
+#if 0 //dump some stats to stats.txt for testing/debuging
+if(s->max_b_frames==0)
+{
+    static FILE *f=NULL;
+    if(!f) f= fopen("stats.txt", "wb");
+    get_psnr(pict->data, s->current_picture,
+             pict->linesize, s->linesize, avctx);
+    fprintf(f, "%7d, %7d, %2.4f\n", pbBufPtr(&s->pb) - s->pb.buf, s->qscale, avctx->psnr_y);
+}
+#endif
 
     if (avctx->get_psnr) {
         /* At this point pict->data should have the original frame   */
@@ -2010,6 +2016,25 @@
         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
+        
+        if(!s->fixed_qscale){
+            /* finding spatial complexity for I-frame rate control */
+            for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+                for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+                    int xx = mb_x * 16;
+                    int yy = mb_y * 16;
+                    uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
+                    int varc;
+                    int sum = pix_sum(pix, s->linesize);
+    
+                    sum= (sum+8)>>4;
+                    varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
+
+                    s->mb_var[s->mb_width * mb_y + mb_x] = varc;
+                    s->mb_var_sum    += varc;
+                }
+            }
+        }
     }
     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
         s->pict_type= I_TYPE;
@@ -2018,7 +2043,7 @@
             s->input_pict_type= I_TYPE;
             s->input_picture_in_gop_number=0;
         }
-//printf("Scene change detected, encoding as I Frame\n");
+//printf("Scene change detected, encoding as I Frame %d %d\n", s->mb_var_sum, s->mc_mb_var_sum);
     }
     
     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) 
@@ -2037,9 +2062,7 @@
 //printf("f_code %d ///\n", s->f_code);
 
 //    printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
-    if(s->flags&CODEC_FLAG_PASS2)
-        s->qscale = ff_rate_estimate_qscale_pass2(s);
-    else if (!s->fixed_qscale) 
+    if (!s->fixed_qscale) 
         s->qscale = ff_rate_estimate_qscale(s);
 
     if (s->out_format == FMT_MJPEG) {
@@ -2675,7 +2698,7 @@
     int i, intra_count=0, inter_count=0;
     int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune
     int inter_conceal= s->msmpeg4_version ? 50 : 50;
-    
+
     // for last block
     if(mb_x>=s->mb_width)  mb_x= s->mb_width -1;
     if(mb_y>=s->mb_height) mb_y= s->mb_height-1;
--- a/mpegvideo.h	Sat Aug 24 13:24:43 2002 +0000
+++ b/mpegvideo.h	Sun Aug 25 21:19:50 2002 +0000
@@ -61,12 +61,34 @@
     UINT64 expected_bits;
     int new_pict_type;
     float new_qscale;
+    int mc_mb_var_sum;
+    int mb_var_sum;
+    int i_count;
+    int f_code;
+    int b_code;
 }RateControlEntry;
 
 typedef struct RateControlContext{
     FILE *stats_file;
-    int num_entries;
+    int num_entries;          /* number of RateControlEntries */
     RateControlEntry *entry;
+    int buffer_index;         /* amount of bits in the video/audio buffer */
+    Predictor pred[5];
+    double short_term_qsum;   /* sum of recent qscales */
+    double short_term_qcount; /* count of recent qscales */
+    double pass1_bits;        /* bits outputted by the pass1 code (including complexity init) */
+    double pass1_wanted_bits; /* bits which should have been outputed by the pass1 code (including complexity init) */
+    double last_qscale;
+    double last_qscale_for[5]; /* last qscale for a specific pict type */
+    double next_non_b_qscale;
+    double next_p_qscale;
+    int last_mc_mb_var_sum;
+    int last_mb_var_sum;
+    UINT64 i_cplx_sum[5];
+    UINT64 p_cplx_sum[5];
+    UINT64 mv_bits_sum[5];
+    UINT64 qscale_sum[5];
+    int frame_count[5];
 }RateControlContext;
 
 typedef struct ReorderBuffer{
@@ -107,9 +129,6 @@
     int flags;        /* AVCodecContext.flags (HQ, MV4, ...) */
     int force_input_type;/* 0= no force, otherwise I_TYPE, P_TYPE, ... */
     int max_b_frames; /* max number of b-frames for encoding */
-    float b_quant_factor;/* qscale factor between ips and b frames */
-    float b_quant_offset;/* qscale offset between ips and b frames */
-    int rc_strategy;
     int b_frame_strategy;
     int luma_elim_threshold;
     int chroma_elim_threshold;
@@ -170,8 +189,8 @@
     int input_pict_type;        /* pict_type prior to reordering of frames */
     int force_type;             /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
     int qscale;                 /* QP */
-    int last_non_b_qscale;	/* QP of last non b frame used for b frame qscale*/
     int pict_type;              /* I_TYPE, P_TYPE, B_TYPE, ... */
+    int last_pict_type;
     int last_non_b_pict_type;   /* used for mpeg4 gmc b-frames & ratecontrol */
     int frame_rate_index;
     /* motion compensation */
@@ -271,18 +290,10 @@
     int I_frame_bits; //FIXME used in mpeg12 ...
     int mb_var_sum;          /* sum of MB variance for current frame */
     int mc_mb_var_sum;       /* motion compensated MB variance for current frame */
-    int last_non_b_mc_mb_var;/* motion compensated MB variance for last non b frame */
     INT64 wanted_bits;
     INT64 total_bits;
     int frame_bits;        /* bits used for the current frame */
-    int pb_frame_bits;     /* bits of the last b...bp group */
-    Predictor i_pred;
-    Predictor p_pred;
-    double qsum;         /* sum of qscales */
-    double qcount;       /* count of qscales */
-    double short_term_qsum;   /* sum of recent qscales */
-    double short_term_qcount; /* count of recent qscales */
-    RateControlContext rc_context;
+    RateControlContext rc_context; // contains stuff only accessed in ratecontrol.c
 
     /* statistics, used for 2-pass encoding */
     int mv_bits;
@@ -590,5 +601,10 @@
 int ff_rate_estimate_qscale_pass2(MpegEncContext *s);
 void ff_write_pass1_stats(MpegEncContext *s);
 void ff_rate_control_uninit(MpegEncContext *s);
+double ff_eval(char *s, double *const_value, char **const_name,
+               double (**func1)(void *, double), char **func1_name, 
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque);
+
 
 #endif /* AVCODEC_MPEGVIDEO_H */
--- a/ratecontrol.c	Sat Aug 24 13:24:43 2002 +0000
+++ b/ratecontrol.c	Sun Aug 25 21:19:50 2002 +0000
@@ -17,84 +17,154 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+#include <math.h>
+#include "common.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
 
-#define STATS_FILE "lavc_stats.txt"
+#undef NDEBUG // allways check asserts, the speed effect is far too small to disable them
+#include <assert.h>
 
 static int init_pass2(MpegEncContext *s);
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num);
 
 void ff_write_pass1_stats(MpegEncContext *s){
-    RateControlContext *rcc= &s->rc_context;
-//    fprintf(c->stats_file, "type:%d q:%d icount:%d pcount:%d scount:%d itex:%d ptex%d mv:%d misc:%d fcode:%d bcode:%d\")
-    fprintf(rcc->stats_file, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d\n",
+    sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n",
             s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type, 
-            s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code);
+            s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, 
+            s->f_code, s->b_code, s->mc_mb_var_sum, s->mb_var_sum, s->i_count);
 }
 
 int ff_rate_control_init(MpegEncContext *s)
 {
     RateControlContext *rcc= &s->rc_context;
+    int i;
     emms_c();
 
-    if(s->flags&CODEC_FLAG_PASS1){
-        rcc->stats_file= fopen(STATS_FILE, "w");
-        if(!rcc->stats_file){
-            fprintf(stderr, "failed to open " STATS_FILE "\n");
-            return -1;
-        }
-    } else if(s->flags&CODEC_FLAG_PASS2){
-        int size;
+    for(i=0; i<5; i++){
+        rcc->pred[i].coeff= 7.0;
+        rcc->pred[i].count= 1.0;
+    
+        rcc->pred[i].decay= 0.4;
+        rcc->i_cplx_sum [i]=
+        rcc->p_cplx_sum [i]=
+        rcc->mv_bits_sum[i]=
+        rcc->qscale_sum [i]=
+        rcc->frame_count[i]= 1; // 1 is better cuz of 1/0 and such
+        rcc->last_qscale_for[i]=5;
+    }
+    rcc->buffer_index= s->avctx->rc_buffer_size/2;
+
+    rcc->next_non_b_qscale=10;
+    rcc->next_p_qscale=10;
+    
+    if(s->flags&CODEC_FLAG_PASS2){
         int i;
+        char *p;
 
-        rcc->stats_file= fopen(STATS_FILE, "r");
-        if(!rcc->stats_file){
-            fprintf(stderr, "failed to open " STATS_FILE "\n");
-            return -1;
+        /* find number of pics */
+        p= s->avctx->stats_in;
+        for(i=-1; p; i++){
+            p= strchr(p+1, ';');
         }
-
-        /* find number of pics without reading the file twice :) */
-        fseek(rcc->stats_file, 0, SEEK_END);
-        size= ftell(rcc->stats_file);
-        fseek(rcc->stats_file, 0, SEEK_SET);
-
-        size/= 64; // we need at least 64 byte to store a line ...
-        rcc->entry = (RateControlEntry*)av_mallocz(size*sizeof(RateControlEntry));
-
-        for(i=0; !feof(rcc->stats_file); i++){
+        i+= s->max_b_frames;
+        rcc->entry = (RateControlEntry*)av_mallocz(i*sizeof(RateControlEntry));
+        rcc->num_entries= i;
+        
+        /* init all to skiped p frames (with b frames we might have a not encoded frame at the end FIXME) */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            rce->pict_type= rce->new_pict_type=P_TYPE;
+            rce->qscale= rce->new_qscale=2;
+            rce->misc_bits= s->mb_num + 10;
+            rce->mb_var_sum= s->mb_num*100;
+        }        
+        
+        /* read stats */
+        p= s->avctx->stats_in;
+        for(i=0; i<rcc->num_entries - s->max_b_frames; i++){
             RateControlEntry *rce;
             int picture_number;
             int e;
-            
-            e= fscanf(rcc->stats_file, "in:%d ", &picture_number);
+            char *next;
+
+            next= strchr(p, ';');
+            if(next){
+                (*next)=0; //sscanf in unbelieavle slow on looong strings //FIXME copy / dont write
+                next++;
+            }
+            e= sscanf(p, " in:%d ", &picture_number);
+
+            assert(picture_number >= 0);
+            assert(picture_number < rcc->num_entries);
             rce= &rcc->entry[picture_number];
-            e+=fscanf(rcc->stats_file, "out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%*d bcode:%*d\n",
-                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits);
-            if(e!=7){
-                fprintf(stderr, STATS_FILE " is damaged\n");
+
+            e+=sscanf(p, " in:%*d out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d",
+                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits, 
+                   &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count);
+            if(e!=12){
+                fprintf(stderr, "statistics are damaged at line %d, parser out=%d\n", i, e);
                 return -1;
             }
+            p= next;
         }
-        rcc->num_entries= i;
         
         if(init_pass2(s) < 0) return -1;
     }
      
-    /* no 2pass stuff, just normal 1-pass */
-    //initial values, they dont really matter as they will be totally different within a few frames
-    s->i_pred.coeff= s->p_pred.coeff= 7.0;
-    s->i_pred.count= s->p_pred.count= 1.0;
-    
-    s->i_pred.decay= s->p_pred.decay= 0.4;
+    if(!(s->flags&CODEC_FLAG_PASS2)){
+
+        rcc->short_term_qsum=0.001;
+        rcc->short_term_qcount=0.001;
     
-    // use more bits at the beginning, otherwise high motion at the begin will look like shit
-    s->qsum=100 * s->qmin;
-    s->qcount=100;
+        rcc->pass1_bits       =0.001;
+        rcc->pass1_wanted_bits=0.001;
+        
+        /* init stuff with the user specified complexity */
+        if(s->avctx->rc_initial_cplx){
+            for(i=0; i<60*30; i++){
+                double bits= s->avctx->rc_initial_cplx * (i/10000.0 + 1.0)*s->mb_num;
+                RateControlEntry rce;
+                double q;
+                
+                if     (i%((s->gop_size+3)/4)==0) rce.pict_type= I_TYPE;
+                else if(i%(s->max_b_frames+1))    rce.pict_type= B_TYPE;
+                else                              rce.pict_type= P_TYPE;
+
+                rce.new_pict_type= rce.pict_type;
+                rce.mc_mb_var_sum= bits*s->mb_num/100000;
+                rce.mb_var_sum   = s->mb_num;
+                rce.qscale   = 2;
+                rce.f_code   = 2;
+                rce.b_code   = 1;
+                rce.misc_bits= 1;
 
-    s->short_term_qsum=0.001;
-    s->short_term_qcount=0.001;
+                if(s->pict_type== I_TYPE){
+                    rce.i_count   = s->mb_num;
+                    rce.i_tex_bits= bits;
+                    rce.p_tex_bits= 0;
+                    rce.mv_bits= 0;
+                }else{
+                    rce.i_count   = 0; //FIXME we do know this approx
+                    rce.i_tex_bits= 0;
+                    rce.p_tex_bits= bits*0.9;
+                    rce.mv_bits= bits*0.1;
+                }
+                rcc->i_cplx_sum [rce.pict_type] += rce.i_tex_bits*rce.qscale;
+                rcc->p_cplx_sum [rce.pict_type] += rce.p_tex_bits*rce.qscale;
+                rcc->mv_bits_sum[rce.pict_type] += rce.mv_bits;
+                rcc->frame_count[rce.pict_type] ++;
 
+                bits= rce.i_tex_bits + rce.p_tex_bits;
+
+                q= get_qscale(s, &rce, rcc->pass1_wanted_bits/rcc->pass1_bits, i);
+                rcc->pass1_wanted_bits+= s->bit_rate/(s->frame_rate / (double)FRAME_RATE_BASE);
+            }
+        }
+
+    }
+    
     return 0;
 }
 
@@ -103,24 +173,257 @@
     RateControlContext *rcc= &s->rc_context;
     emms_c();
 
-    if(rcc->stats_file) 
-        fclose(rcc->stats_file);
-    rcc->stats_file = NULL;
     av_freep(&rcc->entry);
 }
 
+static inline double qp2bits(RateControlEntry *rce, double qp){
+    if(qp<=0.0){
+        fprintf(stderr, "qp<=0.0\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ qp;
+}
+
+static inline double bits2qp(RateControlEntry *rce, double bits){
+    if(bits<0.9){
+        fprintf(stderr, "bits<0.9\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ bits;
+}
+    
+static void update_rc_buffer(MpegEncContext *s, int frame_size){
+    RateControlContext *rcc= &s->rc_context;
+    const double fps= (double)s->frame_rate / FRAME_RATE_BASE;
+    const double buffer_size= s->avctx->rc_buffer_size;
+    const double min_rate= s->avctx->rc_min_rate/fps;
+    const double max_rate= s->avctx->rc_max_rate/fps;
+
+    if(buffer_size){
+        rcc->buffer_index-= frame_size;
+        if(rcc->buffer_index < buffer_size/2 /*FIXME /2 */ || min_rate==0){
+            rcc->buffer_index+= max_rate;
+            if(rcc->buffer_index >= buffer_size)
+                rcc->buffer_index= buffer_size-1;
+        }else{
+            rcc->buffer_index+= min_rate;
+        }
+        
+        if(rcc->buffer_index < 0)
+            fprintf(stderr, "rc buffer underflow\n");
+        if(rcc->buffer_index >= s->avctx->rc_buffer_size)
+            fprintf(stderr, "rc buffer overflow\n");
+    }
+}
+
+/**
+ * modifies the bitrate curve from pass1 for one frame
+ */
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    double q, bits;
+    const int pict_type= rce->new_pict_type;
+    const double mb_num= s->mb_num;  
+    int i;
+    const double last_q= rcc->last_qscale_for[pict_type];
+
+    double const_values[]={
+        M_PI,
+        M_E,
+        rce->i_tex_bits*rce->qscale,
+        rce->p_tex_bits*rce->qscale,
+        (rce->i_tex_bits + rce->p_tex_bits)*(double)rce->qscale,
+        rce->mv_bits/mb_num,
+        rce->pict_type == B_TYPE ? (rce->f_code + rce->b_code)*0.5 : rce->f_code,
+        rce->i_count/mb_num,
+        rce->mc_mb_var_sum/mb_num,
+        rce->mb_var_sum/mb_num,
+        rce->pict_type == I_TYPE,
+        rce->pict_type == P_TYPE,
+        rce->pict_type == B_TYPE,
+        rcc->qscale_sum[pict_type] / (double)rcc->frame_count[pict_type],
+        s->qcompress,
+/*        rcc->last_qscale_for[I_TYPE],
+        rcc->last_qscale_for[P_TYPE],
+        rcc->last_qscale_for[B_TYPE],
+        rcc->next_non_b_qscale,*/
+        rcc->i_cplx_sum[I_TYPE] / (double)rcc->frame_count[I_TYPE],
+        rcc->i_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[B_TYPE] / (double)rcc->frame_count[B_TYPE],
+        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / (double)rcc->frame_count[pict_type],
+        0
+    };
+    char *const_names[]={
+        "PI",
+        "E",
+        "iTex",
+        "pTex",
+        "tex",
+        "mv",
+        "fCode",
+        "iCount",
+        "mcVar",
+        "var",
+        "isI",
+        "isP",
+        "isB",
+        "avgQP",
+        "qComp",
+/*        "lastIQP",
+        "lastPQP",
+        "lastBQP",
+        "nextNonBQP",*/
+        "avgIITex",
+        "avgPITex",
+        "avgPPTex",
+        "avgBPTex",
+        "avgTex",
+        NULL
+    };
+    double (*func1[])(void *, double)={
+        bits2qp,
+        qp2bits,
+        NULL
+    };
+    char *func1_names[]={
+        "bits2qp",
+        "qp2bits",
+        NULL
+    };
+
+    bits= ff_eval(s->avctx->rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
+    
+    rcc->pass1_bits+= bits;
+    bits*=rate_factor;
+    if(bits<0.0) bits=0.0;
+    bits+= 1.0; //avoid 1/0 issues
+    
+    /* user override */
+    for(i=0; i<s->avctx->rc_override_count; i++){
+        RcOverride *rco= s->avctx->rc_override;
+        if(rco[i].start_frame > frame_num) continue;
+        if(rco[i].end_frame   < frame_num) continue;
+    
+        if(rco[i].qscale) 
+            bits= qp2bits(rce, rco[i].qscale); //FIXME move at end to really force it?
+        else
+            bits*= rco[i].quality_factor;
+    }
+
+    q= bits2qp(rce, bits);
+    
+    /* I/B difference */
+    if     (pict_type==I_TYPE && s->avctx->i_quant_factor<0.0)
+        q= -q*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+    else if(pict_type==B_TYPE && s->avctx->b_quant_factor<0.0)
+        q= -q*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+    
+    /* last qscale / qdiff stuff */
+    if     (q > last_q + s->max_qdiff) q= last_q + s->max_qdiff;
+    else if(q < last_q - s->max_qdiff) q= last_q - s->max_qdiff;
+
+    rcc->last_qscale_for[pict_type]= q; //Note we cant do that after blurring
+    
+    return q;
+}
+
+/**
+ * gets the qmin & qmax for pict_type
+ */
+static void get_qminmax(int *qmin_ret, int *qmax_ret, MpegEncContext *s, int pict_type){
+    int qmin= s->qmin;                                                       
+    int qmax= s->qmax;
+
+    if(pict_type==B_TYPE){
+        qmin= (int)(qmin*ABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+        qmax= (int)(qmax*ABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+    }else if(pict_type==I_TYPE){
+        qmin= (int)(qmin*ABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+        qmax= (int)(qmax*ABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+    }
+
+    if(qmin<1) qmin=1;
+    if(qmin==1 && s->qmin>1) qmin=2; //avoid qmin=1 unless the user wants qmin=1
+
+    if(qmin<3 && s->max_qcoeff<=128 && pict_type==I_TYPE) qmin=3; //reduce cliping problems
+
+    if(qmax>31) qmax=31;
+    if(qmax<=qmin) qmax= qmin= (qmax+qmin+1)>>1;
+    
+    *qmin_ret= qmin;
+    *qmax_ret= qmax;
+}
+
+static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    int qmin, qmax;
+    double bits;
+    const int pict_type= rce->new_pict_type;
+    const double buffer_size= s->avctx->rc_buffer_size;
+    const double min_rate= s->avctx->rc_min_rate;
+    const double max_rate= s->avctx->rc_max_rate;
+    
+    get_qminmax(&qmin, &qmax, s, pict_type);
+
+    /* modulation */
+    if(s->avctx->rc_qmod_freq && frame_num%s->avctx->rc_qmod_freq==0 && pict_type==P_TYPE)
+        q*= s->avctx->rc_qmod_amp;
+
+    bits= qp2bits(rce, q);
+
+    /* buffer overflow/underflow protection */
+    if(buffer_size){
+        double expected_size= rcc->buffer_index - bits;
+
+        if(min_rate){
+            double d= 2*(buffer_size - (expected_size + min_rate))/buffer_size;
+            if(d>1.0) d=1.0;
+            q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+        }
+
+        if(max_rate){
+            double d= 2*expected_size/buffer_size;
+            if(d>1.0) d=1.0;
+            q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+        }
+    }
+
+    if(s->avctx->rc_qsquish==0.0){
+        if     (q<qmin) q=qmin;
+        else if(q>qmax) q=qmax;
+    }else{
+        double min2= log(qmin);
+        double max2= log(qmax);
+        
+        q= log(q);
+        q= (q - min2)/(max2-min2) - 0.5;
+        q*= -4.0;
+        q= 1.0/(1.0 + exp(q));
+        q= q*(max2-min2) + min2;
+        
+        q= exp(q);
+    }
+
+    return q;
+}
+
 //----------------------------------
 // 1 Pass Code
 
-static double predict(Predictor *p, double q, double var)
+static double predict_size(Predictor *p, double q, double var)
 {
      return p->coeff*var / (q*p->count);
 }
 
+static double predict_qp(Predictor *p, double size, double var)
+{
+//printf("coeff:%f, count:%f, var:%f, size:%f//\n", p->coeff, p->count, var, size);
+     return p->coeff*var / (size*p->count);
+}
+
 static void update_predictor(Predictor *p, double q, double var, double size)
 {
     double new_coeff= size*q / (var + 1);
-    if(var<1000) return;
+    if(var<10) return;
 
     p->count*= p->decay;
     p->coeff*= p->decay;
@@ -130,90 +433,136 @@
 
 int ff_rate_estimate_qscale(MpegEncContext *s)
 {
-    int qmin= s->qmin;
-    int qmax= s->qmax;
-    int rate_q=5;
     float q;
-    int qscale;
+    int qscale, qmin, qmax;
     float br_compensation;
     double diff;
     double short_term_q;
-    double long_term_q;
     double fps;
-    int picture_number= s->input_picture_number - s->max_b_frames;
+    int picture_number= s->picture_number;
     int64_t wanted_bits;
+    RateControlContext *rcc= &s->rc_context;
+    RateControlEntry local_rce, *rce;
+    double bits;
+    double rate_factor;
+    int var;
+    const int pict_type= s->pict_type;
     emms_c();
 
+    get_qminmax(&qmin, &qmax, s, pict_type);
+
     fps= (double)s->frame_rate / FRAME_RATE_BASE;
-    wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
-//    printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
-    
-    if(s->pict_type==B_TYPE){
-        qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-        qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
-    }
-    if(qmin<1) qmin=1;
-    if(qmax>31) qmax=31;
-    if(qmax<=qmin) qmax= qmin;
-
+//printf("input_picture_number:%d picture_number:%d\n", s->input_picture_number, s->picture_number);
         /* update predictors */
     if(picture_number>2){
-        if(s->pict_type!=B_TYPE && s->last_non_b_pict_type == P_TYPE){
-//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff);
-            update_predictor(&s->p_pred, s->last_non_b_qscale, s->last_non_b_mc_mb_var, s->pb_frame_bits);
-        }
+        const int last_var= s->last_pict_type == I_TYPE ? rcc->last_mb_var_sum : rcc->last_mc_mb_var_sum;
+        update_predictor(&rcc->pred[s->last_pict_type], rcc->last_qscale, sqrt(last_var), s->frame_bits);
     }
 
-    if(s->pict_type == I_TYPE){
-        short_term_q= s->short_term_qsum/s->short_term_qcount;
-    
-        long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
-
-        q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
-    }else if(s->pict_type==B_TYPE){
-        q= (int)(s->last_non_b_qscale*s->b_quant_factor+s->b_quant_offset + 0.5);
-    }else{ //P Frame
-        int i;
-        int diff, best_diff=1000000000;
-        for(i=1; i<=31; i++){
-            diff= predict(&s->p_pred, i, s->mc_mb_var_sum) - (double)s->bit_rate/fps;
-            if(diff<0) diff= -diff;
-            if(diff<best_diff){
-                best_diff= diff;
-                rate_q= i;
-            }
-        }
-        s->short_term_qsum*=s->qblur;
-        s->short_term_qcount*=s->qblur;
-
-        s->short_term_qsum+= rate_q;
-        s->short_term_qcount++;
-        short_term_q= s->short_term_qsum/s->short_term_qcount;
-    
-        long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
-
-//    q= (long_term_q - short_term_q)*s->qcompress + short_term_q;
-        q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+    if(s->flags&CODEC_FLAG_PASS2){
+        assert(picture_number>=0);
+        assert(picture_number<rcc->num_entries);
+        rce= &rcc->entry[picture_number];
+        wanted_bits= rce->expected_bits;
+    }else{
+        rce= &local_rce;
+        wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
     }
 
     diff= s->total_bits - wanted_bits;
     br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
     if(br_compensation<=0.0) br_compensation=0.001;
-    q/=br_compensation;
+
+    var= pict_type == I_TYPE ? s->mb_var_sum : s->mc_mb_var_sum;
+    
+    if(s->flags&CODEC_FLAG_PASS2){
+        if(pict_type!=I_TYPE)
+            assert(pict_type == rce->new_pict_type);
+
+        q= rce->new_qscale / br_compensation;
+//printf("%f %f %f last:%d var:%d type:%d//\n", q, rce->new_qscale, br_compensation, s->frame_bits, var, pict_type);
+    }else{
+        rce->pict_type= 
+        rce->new_pict_type= pict_type;
+        rce->mc_mb_var_sum= s->mc_mb_var_sum;
+        rce->mb_var_sum   = s->   mb_var_sum;
+        rce->qscale   = 2;
+        rce->f_code   = s->f_code;
+        rce->b_code   = s->b_code;
+        rce->misc_bits= 1;
+
+        if(picture_number>0)
+            update_rc_buffer(s, s->frame_bits);
+
+        bits= predict_size(&rcc->pred[pict_type], rce->qscale, sqrt(var));
+        if(pict_type== I_TYPE){
+            rce->i_count   = s->mb_num;
+            rce->i_tex_bits= bits;
+            rce->p_tex_bits= 0;
+            rce->mv_bits= 0;
+        }else{
+            rce->i_count   = 0; //FIXME we do know this approx
+            rce->i_tex_bits= 0;
+            rce->p_tex_bits= bits*0.9;
+            
+            rce->mv_bits= bits*0.1;
+        }
+        rcc->i_cplx_sum [pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[pict_type] += rce->mv_bits;
+        rcc->frame_count[pict_type] ++;
+
+        bits= rce->i_tex_bits + rce->p_tex_bits;
+        rate_factor= rcc->pass1_wanted_bits/rcc->pass1_bits * br_compensation;
+    
+        q= get_qscale(s, rce, rate_factor, picture_number);
+
+//printf("%f ", q);
+        if     (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0)
+            q= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+        else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0)
+            q= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+            
+//printf("%f ", q);
+        assert(q>0.0);
+
+        if(pict_type==P_TYPE || s->intra_only){ //FIXME type dependant blur like in 2-pass
+            rcc->short_term_qsum*=s->qblur;
+            rcc->short_term_qcount*=s->qblur;
+
+            rcc->short_term_qsum+= q;
+            rcc->short_term_qcount++;
+//printf("%f ", q);
+            q= short_term_q= rcc->short_term_qsum/rcc->short_term_qcount;
+//printf("%f ", q);
+        }
+        q= modify_qscale(s, rce, q, picture_number);
+
+        rcc->pass1_wanted_bits+= s->bit_rate/fps;
+
+        if(pict_type != B_TYPE) rcc->next_non_b_qscale= q;
+        if(pict_type == P_TYPE) rcc->next_p_qscale= q;
+    }
+//printf("qmin:%d, qmax:%d, q:%f\n", qmin, qmax, q);
+    
+
+    if     (q<qmin) q=qmin; 
+    else if(q>qmax) q=qmax;
+        
+//    printf("%f %d %d %d\n", q, picture_number, (int)wanted_bits, (int)s->total_bits);
+    
+
 //printf("%f %f %f\n", q, br_compensation, short_term_q);
     qscale= (int)(q + 0.5);
-    if     (qscale<qmin) qscale=qmin;
-    else if(qscale>qmax) qscale=qmax;
+//printf("%d ", qscale);
     
-    if(s->pict_type!=B_TYPE){
-        s->qsum+= qscale;
-        s->qcount++;
-        if     (qscale<s->last_non_b_qscale-s->max_qdiff) qscale=s->last_non_b_qscale-s->max_qdiff;
-        else if(qscale>s->last_non_b_qscale+s->max_qdiff) qscale=s->last_non_b_qscale+s->max_qdiff;
-    }
 //printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation, 
 //       rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
 //printf("%d %d\n", s->bit_rate, (int)fps);
+
+    rcc->last_qscale= qscale;
+    rcc->last_mc_mb_var_sum= s->mc_mb_var_sum;
+    rcc->last_mb_var_sum= s->mb_var_sum;
     return qscale;
 }
 
@@ -231,10 +580,12 @@
     uint64_t available_bits[5];
     uint64_t all_const_bits;
     uint64_t all_available_bits= (uint64_t)(s->bit_rate*(double)rcc->num_entries/fps);
-    int num_frames[5]={0,0,0,0,0};
     double rate_factor=0;
     double step;
     int last_i_frame=-10000000;
+    const int filter_size= (int)(s->qblur*4) | 1;  
+    double expected_bits;
+    double *qscale, *blured_qscale;
 
     /* find complexity & const_bits & decide the pict_types */
     for(i=0; i<rcc->num_entries; i++){
@@ -272,10 +623,13 @@
                 break;
             }
         }
+        rcc->i_cplx_sum [rce->pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [rce->pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits;
+        rcc->frame_count[rce->pict_type] ++;
 
         complexity[rce->new_pict_type]+= (rce->i_tex_bits+ rce->p_tex_bits)*(double)rce->qscale;
         const_bits[rce->new_pict_type]+= rce->mv_bits + rce->misc_bits;
-        num_frames[rce->new_pict_type]++;
     }
     all_const_bits= const_bits[I_TYPE] + const_bits[P_TYPE] + const_bits[B_TYPE];
     
@@ -283,120 +637,108 @@
         fprintf(stderr, "requested bitrate is to low\n");
         return -1;
     }
-
-//    avg_complexity= complexity/rcc->num_entries;
-    avg_quantizer[P_TYPE]= 
-    avg_quantizer[I_TYPE]=   (complexity[I_TYPE]+complexity[P_TYPE] + complexity[B_TYPE]/s->b_quant_factor) 
-                           / (all_available_bits - all_const_bits);
-    avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*s->b_quant_factor + s->b_quant_offset;
-//printf("avg quantizer: %f %f\n", avg_quantizer[P_TYPE], avg_quantizer[B_TYPE]);
+    
+    /* find average quantizers */
+    avg_quantizer[P_TYPE]=0;
+    for(step=256*256; step>0.0000001; step*=0.5){
+        double expected_bits=0;
+        avg_quantizer[P_TYPE]+= step;
+        
+        avg_quantizer[I_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->i_quant_factor) + s->avctx->i_quant_offset;
+        avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
+        
+        expected_bits= 
+            + all_const_bits 
+            + complexity[I_TYPE]/avg_quantizer[I_TYPE]
+            + complexity[P_TYPE]/avg_quantizer[P_TYPE]
+            + complexity[B_TYPE]/avg_quantizer[B_TYPE];
+            
+        if(expected_bits < all_available_bits) avg_quantizer[P_TYPE]-= step;
+//printf("%f %lld %f\n", expected_bits, all_available_bits, avg_quantizer[P_TYPE]);
+    }
+printf("qp_i:%f, qp_p:%f, qp_b:%f\n", avg_quantizer[I_TYPE],avg_quantizer[P_TYPE],avg_quantizer[B_TYPE]);
 
     for(i=0; i<5; i++){
         available_bits[i]= const_bits[i] + complexity[i]/avg_quantizer[i];
     }
 //printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits);
-    
+        
+    qscale= malloc(sizeof(double)*rcc->num_entries);
+    blured_qscale= malloc(sizeof(double)*rcc->num_entries);
+
     for(step=256*256; step>0.0000001; step*=0.5){
-        uint64_t expected_bits=0;
+        expected_bits=0;
         rate_factor+= step;
+        
+        rcc->buffer_index= s->avctx->rc_buffer_size/2;
+
         /* find qscale */
         for(i=0; i<rcc->num_entries; i++){
-            RateControlEntry *rce= &rcc->entry[i];
-            double short_term_q, q, bits_left;
-            const int pict_type= rce->new_pict_type;
-            int qmin= s->qmin;
-            int qmax= s->qmax;
+            qscale[i]= get_qscale(s, &rcc->entry[i], rate_factor, i);
+        }
+        assert(filter_size%2==1);
 
-            if(pict_type==B_TYPE){
-                qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-                qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
-            }
-            if(qmin<1) qmin=1;
-            if(qmax>31) qmax=31;
-            if(qmax<=qmin) qmax= qmin;
-            
-            switch(s->rc_strategy){
-            case 0:
-                bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor - rce->misc_bits - rce->mv_bits;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
-                break;
-            case 1:
-                bits_left= (available_bits[pict_type] - const_bits[pict_type])/num_frames[pict_type]*rate_factor;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
-                break;
-            case 2:
-                bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits + rce->mv_bits)/bits_left;
-                break;
-            default:
-                fprintf(stderr, "unknown strategy\n");
-                short_term_q=3; //gcc warning fix
-            }
+        /* fixed I/B QP relative to P mode */
+        rcc->next_non_b_qscale= 10;
+        rcc->next_p_qscale= 10;
+        for(i=rcc->num_entries-1; i>=0; i--){
+            RateControlEntry *rce= &rcc->entry[i];
+            const int pict_type= rce->new_pict_type;
+        
+            if     (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0)
+                qscale[i]= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+            else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0)
+                qscale[i]= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
 
-            if(short_term_q>31.0) short_term_q=31.0;
-            else if (short_term_q<1.0) short_term_q=1.0;
-
-            q= 1/((1/avg_quantizer[pict_type] - 1/short_term_q)*s->qcompress + 1/short_term_q);
-            if     (q<qmin) q=qmin;
-            else if(q>qmax) q=qmax;
-//printf("lq:%f, sq:%f t:%f q:%f\n", avg_quantizer[rce->pict_type], short_term_q, bits_left, q);
-            rce->new_qscale= q;
+            if(pict_type!=B_TYPE) 
+                rcc->next_non_b_qscale= qscale[i];
+            if(pict_type==P_TYPE) 
+                rcc->next_p_qscale= qscale[i];
         }
 
         /* smooth curve */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            const int pict_type= rce->new_pict_type;
+            int j;
+            double q=0.0, sum=0.0;
+        
+            for(j=0; j<filter_size; j++){
+                int index= i+j-filter_size/2;
+                double d= index-i;
+                double coeff= s->qblur==0 ? 1.0 : exp(-d*d/(s->qblur * s->qblur));
+            
+                if(index < 0 || index >= rcc->num_entries) continue;
+                if(pict_type != rcc->entry[index].new_pict_type) continue;
+                q+= qscale[index] * coeff;
+                sum+= coeff;
+            }
+            blured_qscale[i]= q/sum;
+        }
     
         /* find expected bits */
         for(i=0; i<rcc->num_entries; i++){
             RateControlEntry *rce= &rcc->entry[i];
-            double factor= rce->qscale / rce->new_qscale;
-            
+            double bits;
+            rce->new_qscale= modify_qscale(s, rce, blured_qscale[i], i);
+            bits= qp2bits(rce, rce->new_qscale);
+//printf("%d %f\n", rce->new_bits, blured_qscale[i]);
+            update_rc_buffer(s, bits);
+
             rce->expected_bits= expected_bits;
-            expected_bits += (int)(rce->misc_bits + rce->mv_bits + (rce->i_tex_bits + rce->p_tex_bits)*factor + 0.5);
+            expected_bits += bits;
         }
 
-//        printf("%d %d %f\n", (int)expected_bits, (int)all_available_bits, rate_factor);
+//        printf("%f %d %f\n", expected_bits, (int)all_available_bits, rate_factor);
         if(expected_bits > all_available_bits) rate_factor-= step;
     }
+    free(qscale);
+    free(blured_qscale);
+
+    if(abs(expected_bits/all_available_bits - 1.0) > 0.01 ){
+        fprintf(stderr, "Error: 2pass curve failed to converge\n");
+        return -1;
+    }
 
     return 0;
 }
-
-int ff_rate_estimate_qscale_pass2(MpegEncContext *s)
-{
-    int qmin= s->qmin;
-    int qmax= s->qmax;
-    float q;
-    int qscale;
-    float br_compensation;
-    double diff;
-    int picture_number= s->picture_number;
-    RateControlEntry *rce= &s->rc_context.entry[picture_number];
-    int64_t wanted_bits= rce->expected_bits;
-    emms_c();
-
-//    printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
-    
-    if(s->pict_type==B_TYPE){
-        qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-        qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
-    }
-    if(qmin<1) qmin=1;
-    if(qmax>31) qmax=31;
-    if(qmax<=qmin) qmax= qmin;
-
-    q= rce->new_qscale;
-
-    diff= s->total_bits - wanted_bits;
-    br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
-    if(br_compensation<=0.0) br_compensation=0.001;
-    q/=br_compensation;
-
-    qscale= (int)(q + 0.5);
-    if     (qscale<qmin) qscale=qmin;
-    else if(qscale>qmax) qscale=qmax;
-//    printf("%d %d %d %d type:%d\n", qmin, qscale, qmax, picture_number, s->pict_type); fflush(stdout);
-    return qscale;
-}