# HG changeset patch
# User michael
# Date 1066821819 0
# Node ID d736e24bf3033d5e194a522fdd072f143aec3cb9
# Parent  541681146f8322a3c0aac65392c2ff90053386fc
move mmx quantizer matrixes out of MpegEncContext (23k -> 7k) (no meassureable slowdown)

diff -r 541681146f83 -r d736e24bf303 i386/mpegvideo_mmx_template.c
--- a/i386/mpegvideo_mmx_template.c	Wed Oct 22 10:59:39 2003 +0000
+++ b/i386/mpegvideo_mmx_template.c	Wed Oct 22 11:23:39 2003 +0000
@@ -76,12 +76,12 @@
         block[0]=0; //avoid fake overflow
 //        temp_block[0] = (block[0] + (q >> 1)) / q;
         last_non_zero_p1 = 1;
-        bias = s->q_intra_matrix16_bias[qscale];
-        qmat = s->q_intra_matrix16[qscale];
+        bias = s->q_intra_matrix16[qscale][1];
+        qmat = s->q_intra_matrix16[qscale][0];
     } else {
         last_non_zero_p1 = 0;
-        bias = s->q_inter_matrix16_bias[qscale];
-        qmat = s->q_inter_matrix16[qscale];
+        bias = s->q_inter_matrix16[qscale][1];
+        qmat = s->q_inter_matrix16[qscale][0];
     }
 
     if(s->out_format == FMT_H263 && s->mpeg_quant==0){
diff -r 541681146f83 -r d736e24bf303 mpegvideo.c
--- a/mpegvideo.c	Wed Oct 22 10:59:39 2003 +0000
+++ b/mpegvideo.c	Wed Oct 22 11:23:39 2003 +0000
@@ -93,7 +93,7 @@
 
 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 
-static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
+static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
 {
     int qscale;
@@ -132,10 +132,10 @@
                 */
                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
-                qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
-
-                if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
-                qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
+                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
+
+                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
+                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
             }
         }
     }
@@ -445,6 +445,8 @@
         
         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
+        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
+        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
     }
         
     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
@@ -565,6 +567,8 @@
     av_freep(&s->lambda_table);
     av_freep(&s->q_intra_matrix);
     av_freep(&s->q_inter_matrix);
+    av_freep(&s->q_intra_matrix16);
+    av_freep(&s->q_inter_matrix16);
 
     for(i=0; i<MAX_PICTURE_COUNT; i++){
         free_picture(s, &s->picture[i]);
@@ -882,9 +886,9 @@
     /* precompute matrix */
     /* for mjpeg, we do include qscale in the matrix */
     if (s->out_format != FMT_MJPEG) {
-        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
+        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
                        s->intra_matrix, s->intra_quant_bias, 1, 31);
-        convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
+        convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, 
                        s->inter_matrix, s->inter_quant_bias, 1, 31);
     }
 
@@ -3502,7 +3506,7 @@
             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
         }
         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
-                       s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
+                       s->intra_matrix, s->intra_quant_bias, 8, 8);
     }
     
     //FIXME var duplication
diff -r 541681146f83 -r d736e24bf303 mpegvideo.h
--- a/mpegvideo.h	Wed Oct 22 10:59:39 2003 +0000
+++ b/mpegvideo.h	Wed Oct 22 11:23:39 2003 +0000
@@ -459,11 +459,9 @@
     /** precomputed matrix (combine qscale and DCT renorm) */
     int (*q_intra_matrix)[64];
     int (*q_inter_matrix)[64];
-    /** identical to the above but for MMX & these are not permutated */
-    uint16_t __align8 q_intra_matrix16[32][64];
-    uint16_t __align8 q_inter_matrix16[32][64];
-    uint16_t __align8 q_intra_matrix16_bias[32][64];
-    uint16_t __align8 q_inter_matrix16_bias[32][64];
+    /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
+    uint16_t (*q_intra_matrix16)[2][64];
+    uint16_t (*q_inter_matrix16)[2][64];
     int block_last_index[6];  ///< last non zero coefficient in block
     /* scantables */
     ScanTable __align8 intra_scantable;