changeset 764:d4726182dfd2 libavcodec

optimize block_permute() optimize dct_quantize_c() dont permute s->q_inter/intra_matrix
author michaelni
date Wed, 23 Oct 2002 15:11:07 +0000
parents 339db642859d
children 7f0d502a42c5
files dsputil.c dsputil.h mpegvideo.c mpegvideo.h
diffstat 4 files changed, 32 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/dsputil.c	Wed Oct 23 08:14:12 2002 +0000
+++ b/dsputil.c	Wed Oct 23 15:11:07 2002 +0000
@@ -1553,16 +1553,25 @@
     return s;
 }
 
-/* permute block according so that it corresponds to the MMX idct
-   order */
-void block_permute(INT16 *block, UINT8 *permutation)
+void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
 {
-	int i;
-	INT16 temp[64];
+    int i;
+    INT16 temp[64];
+    
+    if(last<=0) return;
+    if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
 
-	for(i=0; i<64; i++) temp[ permutation[i] ] = block[i];
-
-	for(i=0; i<64; i++) block[i] = temp[i];
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        temp[j]= block[j];
+        block[j]=0;
+    }
+    
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        const int perm_j= permutation[j];
+        block[perm_j]= temp[j];
+    }
 }
 
 void clear_blocks_c(DCTELEM *blocks)
--- a/dsputil.h	Wed Oct 23 08:14:12 2002 +0000
+++ b/dsputil.h	Wed Oct 23 15:11:07 2002 +0000
@@ -115,7 +115,11 @@
 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 
-void block_permute(INT16 *block, UINT8 *permutation);
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
 
 #if defined(HAVE_MMX)
 
--- a/mpegvideo.c	Wed Oct 23 08:14:12 2002 +0000
+++ b/mpegvideo.c	Wed Oct 23 15:11:07 2002 +0000
@@ -94,7 +94,7 @@
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
                 
-                qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
                                 (qscale * quant_matrix[j]));
             }
         } else if (s->fdct == fdct_ifast) {
@@ -105,7 +105,7 @@
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
                 
-                qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
+                qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
                                 (aanscales[i] * qscale * quant_matrix[j]));
             }
         } else {
@@ -138,6 +138,8 @@
 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
     int i;
     int end;
+    
+    st->scantable= src_scantable;
 
     for(i=0; i<64; i++){
         int j;
@@ -2968,18 +2970,13 @@
 {
     int i, j, level, last_non_zero, q;
     const int *qmat;
+    const UINT8 *scantable= s->intra_scantable.scantable;
     int bias;
     int max=0;
     unsigned int threshold1, threshold2;
     
     s->fdct (block);
 
-#ifndef ARCH_ALPHA              /* Alpha uses unpermuted matrix */
-    /* we need this permutation so that we correct the IDCT
-       permutation. will be moved into DCT code */
-    block_permute(block, s->idct_permutation); //FIXME remove
-#endif
-
     if (s->mb_intra) {
         if (!s->h263_aic) {
             if (n < 4)
@@ -3007,7 +3004,7 @@
     threshold2= (threshold1<<1);
 
     for(;i<64;i++) {
-        j = s->intra_scantable.permutated[i];
+        j = scantable[i];
         level = block[j];
         level = level * qmat[j];
 
@@ -3029,6 +3026,9 @@
     }
     *overflow= s->max_qcoeff < max; //overflow might have happend
     
+    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
+    ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
+
     return last_non_zero;
 }
 
--- a/mpegvideo.h	Wed Oct 23 08:14:12 2002 +0000
+++ b/mpegvideo.h	Wed Oct 23 15:11:07 2002 +0000
@@ -100,6 +100,7 @@
 } ReorderBuffer;
 
 typedef struct ScanTable{
+    const UINT8 *scantable;
     UINT8 permutated[64];
     UINT8 raster_end[64];
 } ScanTable;